You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tokenizer.js 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", {
  3. value: true
  4. });
  5. exports.tokenize = tokenize;
  6. exports.tokens = exports.keywords = void 0;
  7. var _helperFsm = require("@webassemblyjs/helper-fsm");
  8. var _helperCodeFrame = require("@webassemblyjs/helper-code-frame");
  9. // eslint-disable-next-line
  10. function getCodeFrame(source, line, column) {
  11. var loc = {
  12. start: {
  13. line: line,
  14. column: column
  15. }
  16. };
  17. return "\n" + (0, _helperCodeFrame.codeFrameFromSource)(source, loc) + "\n";
  18. }
  19. var WHITESPACE = /\s/;
  20. var PARENS = /\(|\)/;
  21. var LETTERS = /[a-z0-9_/]/i;
  22. var idchar = /[a-z0-9!#$%&*+./:<=>?@\\[\]^_`|~-]/i;
  23. var valtypes = ["i32", "i64", "f32", "f64"];
  24. var NUMBERS = /[0-9|.|_]/;
  25. var NUMBER_KEYWORDS = /nan|inf/;
  26. function isNewLine(char) {
  27. return char.charCodeAt(0) === 10 || char.charCodeAt(0) === 13;
  28. }
  29. function Token(type, value, start, end) {
  30. var opts = arguments.length > 4 && arguments[4] !== undefined ? arguments[4] : {};
  31. var token = {
  32. type: type,
  33. value: value,
  34. loc: {
  35. start: start,
  36. end: end
  37. }
  38. };
  39. if (Object.keys(opts).length > 0) {
  40. // $FlowIgnore
  41. token["opts"] = opts;
  42. }
  43. return token;
  44. }
  45. var tokenTypes = {
  46. openParen: "openParen",
  47. closeParen: "closeParen",
  48. number: "number",
  49. string: "string",
  50. name: "name",
  51. identifier: "identifier",
  52. valtype: "valtype",
  53. dot: "dot",
  54. comment: "comment",
  55. equal: "equal",
  56. keyword: "keyword"
  57. };
  58. var keywords = {
  59. module: "module",
  60. func: "func",
  61. param: "param",
  62. result: "result",
  63. export: "export",
  64. loop: "loop",
  65. block: "block",
  66. if: "if",
  67. then: "then",
  68. else: "else",
  69. call: "call",
  70. call_indirect: "call_indirect",
  71. import: "import",
  72. memory: "memory",
  73. table: "table",
  74. global: "global",
  75. anyfunc: "anyfunc",
  76. mut: "mut",
  77. data: "data",
  78. type: "type",
  79. elem: "elem",
  80. start: "start",
  81. offset: "offset"
  82. };
  83. exports.keywords = keywords;
  84. var NUMERIC_SEPARATOR = "_";
  85. /**
  86. * Build the FSM for number literals
  87. */
  88. var numberLiteralFSM = new _helperFsm.FSM({
  89. START: [(0, _helperFsm.makeTransition)(/-|\+/, "AFTER_SIGN"), (0, _helperFsm.makeTransition)(/nan:0x/, "NAN_HEX", {
  90. n: 6
  91. }), (0, _helperFsm.makeTransition)(/nan|inf/, "STOP", {
  92. n: 3
  93. }), (0, _helperFsm.makeTransition)(/0x/, "HEX", {
  94. n: 2
  95. }), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC"), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC")],
  96. AFTER_SIGN: [(0, _helperFsm.makeTransition)(/nan:0x/, "NAN_HEX", {
  97. n: 6
  98. }), (0, _helperFsm.makeTransition)(/nan|inf/, "STOP", {
  99. n: 3
  100. }), (0, _helperFsm.makeTransition)(/0x/, "HEX", {
  101. n: 2
  102. }), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC"), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC")],
  103. DEC_FRAC: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC_FRAC", {
  104. allowedSeparator: NUMERIC_SEPARATOR
  105. }), (0, _helperFsm.makeTransition)(/e|E/, "DEC_SIGNED_EXP")],
  106. DEC: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC", {
  107. allowedSeparator: NUMERIC_SEPARATOR
  108. }), (0, _helperFsm.makeTransition)(/\./, "DEC_FRAC"), (0, _helperFsm.makeTransition)(/e|E/, "DEC_SIGNED_EXP")],
  109. DEC_SIGNED_EXP: [(0, _helperFsm.makeTransition)(/\+|-/, "DEC_EXP"), (0, _helperFsm.makeTransition)(/[0-9]/, "DEC_EXP")],
  110. DEC_EXP: [(0, _helperFsm.makeTransition)(/[0-9]/, "DEC_EXP", {
  111. allowedSeparator: NUMERIC_SEPARATOR
  112. })],
  113. HEX: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "HEX", {
  114. allowedSeparator: NUMERIC_SEPARATOR
  115. }), (0, _helperFsm.makeTransition)(/\./, "HEX_FRAC"), (0, _helperFsm.makeTransition)(/p|P/, "HEX_SIGNED_EXP")],
  116. HEX_FRAC: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "HEX_FRAC", {
  117. allowedSeparator: NUMERIC_SEPARATOR
  118. }), (0, _helperFsm.makeTransition)(/p|P|/, "HEX_SIGNED_EXP")],
  119. HEX_SIGNED_EXP: [(0, _helperFsm.makeTransition)(/[0-9|+|-]/, "HEX_EXP")],
  120. HEX_EXP: [(0, _helperFsm.makeTransition)(/[0-9]/, "HEX_EXP", {
  121. allowedSeparator: NUMERIC_SEPARATOR
  122. })],
  123. NAN_HEX: [(0, _helperFsm.makeTransition)(/[0-9|A-F|a-f]/, "NAN_HEX", {
  124. allowedSeparator: NUMERIC_SEPARATOR
  125. })],
  126. STOP: []
  127. }, "START", "STOP");
  128. function tokenize(input) {
  129. var current = 0;
  130. var char = input[current]; // Used by SourceLocation
  131. var column = 1;
  132. var line = 1;
  133. var tokens = [];
  134. /**
  135. * Creates a pushToken function for a given type
  136. */
  137. function pushToken(type) {
  138. return function (v) {
  139. var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
  140. var startColumn = opts.startColumn || column - String(v).length;
  141. delete opts.startColumn;
  142. var endColumn = opts.endColumn || startColumn + String(v).length - 1;
  143. delete opts.endColumn;
  144. var start = {
  145. line: line,
  146. column: startColumn
  147. };
  148. var end = {
  149. line: line,
  150. column: endColumn
  151. };
  152. tokens.push(Token(type, v, start, end, opts));
  153. };
  154. }
  155. /**
  156. * Functions to save newly encountered tokens
  157. */
  158. var pushCloseParenToken = pushToken(tokenTypes.closeParen);
  159. var pushOpenParenToken = pushToken(tokenTypes.openParen);
  160. var pushNumberToken = pushToken(tokenTypes.number);
  161. var pushValtypeToken = pushToken(tokenTypes.valtype);
  162. var pushNameToken = pushToken(tokenTypes.name);
  163. var pushIdentifierToken = pushToken(tokenTypes.identifier);
  164. var pushKeywordToken = pushToken(tokenTypes.keyword);
  165. var pushDotToken = pushToken(tokenTypes.dot);
  166. var pushStringToken = pushToken(tokenTypes.string);
  167. var pushCommentToken = pushToken(tokenTypes.comment);
  168. var pushEqualToken = pushToken(tokenTypes.equal);
  169. /**
  170. * Can be used to look at the next character(s).
  171. *
  172. * The default behavior `lookahead()` simply returns the next character without consuming it.
  173. * Letters are always returned in lowercase.
  174. *
  175. * @param {number} length How many characters to query. Default = 1
  176. * @param {number} offset How many characters to skip forward from current one. Default = 1
  177. *
  178. */
  179. function lookahead() {
  180. var length = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
  181. var offset = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
  182. return input.substring(current + offset, current + offset + length).toLowerCase();
  183. }
  184. /**
  185. * Advances the cursor in the input by a certain amount
  186. *
  187. * @param {number} amount How many characters to consume. Default = 1
  188. */
  189. function eatCharacter() {
  190. var amount = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
  191. column += amount;
  192. current += amount;
  193. char = input[current];
  194. }
  195. while (current < input.length) {
  196. // ;;
  197. if (char === ";" && lookahead() === ";") {
  198. var startColumn = column;
  199. eatCharacter(2);
  200. var text = "";
  201. while (!isNewLine(char)) {
  202. text += char;
  203. eatCharacter();
  204. if (char === undefined) {
  205. break;
  206. }
  207. }
  208. var endColumn = column;
  209. pushCommentToken(text, {
  210. type: "leading",
  211. startColumn: startColumn,
  212. endColumn: endColumn
  213. });
  214. continue;
  215. } // (;
  216. if (char === "(" && lookahead() === ";") {
  217. var _startColumn = column;
  218. eatCharacter(2);
  219. var _text = ""; // ;)
  220. while (true) {
  221. char = input[current];
  222. if (char === ";" && lookahead() === ")") {
  223. eatCharacter(2);
  224. break;
  225. }
  226. _text += char;
  227. eatCharacter();
  228. if (isNewLine(char)) {
  229. line++;
  230. column = 0;
  231. }
  232. }
  233. var _endColumn = column;
  234. pushCommentToken(_text, {
  235. type: "block",
  236. startColumn: _startColumn,
  237. endColumn: _endColumn
  238. });
  239. continue;
  240. }
  241. if (char === "(") {
  242. pushOpenParenToken(char);
  243. eatCharacter();
  244. continue;
  245. }
  246. if (char === "=") {
  247. pushEqualToken(char);
  248. eatCharacter();
  249. continue;
  250. }
  251. if (char === ")") {
  252. pushCloseParenToken(char);
  253. eatCharacter();
  254. continue;
  255. }
  256. if (isNewLine(char)) {
  257. line++;
  258. eatCharacter();
  259. column = 0;
  260. continue;
  261. }
  262. if (WHITESPACE.test(char)) {
  263. eatCharacter();
  264. continue;
  265. }
  266. if (char === "$") {
  267. var _startColumn2 = column;
  268. eatCharacter();
  269. var value = "";
  270. while (idchar.test(char)) {
  271. value += char;
  272. eatCharacter();
  273. }
  274. var _endColumn2 = column;
  275. pushIdentifierToken(value, {
  276. startColumn: _startColumn2,
  277. endColumn: _endColumn2
  278. });
  279. continue;
  280. }
  281. if (NUMBERS.test(char) || NUMBER_KEYWORDS.test(lookahead(3, 0)) || char === "-" || char === "+") {
  282. var _startColumn3 = column;
  283. var _value = numberLiteralFSM.run(input.slice(current));
  284. if (_value === "") {
  285. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  286. }
  287. pushNumberToken(_value, {
  288. startColumn: _startColumn3
  289. });
  290. eatCharacter(_value.length);
  291. if (char && !PARENS.test(char) && !WHITESPACE.test(char)) {
  292. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  293. }
  294. continue;
  295. }
  296. if (char === '"') {
  297. var _startColumn4 = column;
  298. var _value2 = "";
  299. eatCharacter(); // "
  300. while (char !== '"') {
  301. if (isNewLine(char)) {
  302. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  303. }
  304. _value2 += char;
  305. eatCharacter(); // char
  306. }
  307. eatCharacter(); // "
  308. var _endColumn3 = column;
  309. pushStringToken(_value2, {
  310. startColumn: _startColumn4,
  311. endColumn: _endColumn3
  312. });
  313. continue;
  314. }
  315. if (LETTERS.test(char)) {
  316. var _value3 = "";
  317. var _startColumn5 = column;
  318. while (char && LETTERS.test(char)) {
  319. _value3 += char;
  320. eatCharacter();
  321. }
  322. /*
  323. * Handle MemberAccess
  324. */
  325. if (char === ".") {
  326. var dotStartColumn = column;
  327. if (valtypes.indexOf(_value3) !== -1) {
  328. pushValtypeToken(_value3, {
  329. startColumn: _startColumn5
  330. });
  331. } else {
  332. pushNameToken(_value3);
  333. }
  334. eatCharacter();
  335. _value3 = "";
  336. var nameStartColumn = column;
  337. while (LETTERS.test(char)) {
  338. _value3 += char;
  339. eatCharacter();
  340. }
  341. pushDotToken(".", {
  342. startColumn: dotStartColumn
  343. });
  344. pushNameToken(_value3, {
  345. startColumn: nameStartColumn
  346. });
  347. continue;
  348. }
  349. /*
  350. * Handle keywords
  351. */
  352. // $FlowIgnore
  353. if (typeof keywords[_value3] === "string") {
  354. pushKeywordToken(_value3, {
  355. startColumn: _startColumn5
  356. });
  357. continue;
  358. }
  359. /*
  360. * Handle types
  361. */
  362. if (valtypes.indexOf(_value3) !== -1) {
  363. pushValtypeToken(_value3, {
  364. startColumn: _startColumn5
  365. });
  366. continue;
  367. }
  368. /*
  369. * Handle literals
  370. */
  371. pushNameToken(_value3, {
  372. startColumn: _startColumn5
  373. });
  374. continue;
  375. }
  376. throw new Error(getCodeFrame(input, line, column) + "Unexpected character " + JSON.stringify(char));
  377. }
  378. return tokens;
  379. }
  380. var tokens = tokenTypes;
  381. exports.tokens = tokens;