tokenize.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. "use strict";
  2. module.exports = tokenize;
  3. var delimRe = /[\s{}=;:[\],'"()<>]/g,
  4. stringDoubleRe = /(?:"([^"\\]*(?:\\.[^"\\]*)*)")/g,
  5. stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g;
  6. var setCommentRe = /^ *[*/]+ */,
  7. setCommentAltRe = /^\s*\*?\/*/,
  8. setCommentSplitRe = /\n/g,
  9. whitespaceRe = /\s/,
  10. unescapeRe = /\\(.?)/g;
  11. var unescapeMap = {
  12. "0": "\0",
  13. "r": "\r",
  14. "n": "\n",
  15. "t": "\t"
  16. };
  17. /**
  18. * Unescapes a string.
  19. * @param {string} str String to unescape
  20. * @returns {string} Unescaped string
  21. * @property {Object.<string,string>} map Special characters map
  22. * @memberof tokenize
  23. */
  24. function unescape(str) {
  25. return str.replace(unescapeRe, function($0, $1) {
  26. switch ($1) {
  27. case "\\":
  28. case "":
  29. return $1;
  30. default:
  31. return unescapeMap[$1] || "";
  32. }
  33. });
  34. }
  35. tokenize.unescape = unescape;
  36. /**
  37. * Gets the next token and advances.
  38. * @typedef TokenizerHandleNext
  39. * @type {function}
  40. * @returns {string|null} Next token or `null` on eof
  41. */
  42. /**
  43. * Peeks for the next token.
  44. * @typedef TokenizerHandlePeek
  45. * @type {function}
  46. * @returns {string|null} Next token or `null` on eof
  47. */
  48. /**
  49. * Pushes a token back to the stack.
  50. * @typedef TokenizerHandlePush
  51. * @type {function}
  52. * @param {string} token Token
  53. * @returns {undefined}
  54. */
  55. /**
  56. * Skips the next token.
  57. * @typedef TokenizerHandleSkip
  58. * @type {function}
  59. * @param {string} expected Expected token
  60. * @param {boolean} [optional=false] If optional
  61. * @returns {boolean} Whether the token matched
  62. * @throws {Error} If the token didn't match and is not optional
  63. */
  64. /**
  65. * Gets the comment on the previous line or, alternatively, the line comment on the specified line.
  66. * @typedef TokenizerHandleCmnt
  67. * @type {function}
  68. * @param {number} [line] Line number
  69. * @returns {string|null} Comment text or `null` if none
  70. */
  71. /**
  72. * Handle object returned from {@link tokenize}.
  73. * @interface ITokenizerHandle
  74. * @property {TokenizerHandleNext} next Gets the next token and advances (`null` on eof)
  75. * @property {TokenizerHandlePeek} peek Peeks for the next token (`null` on eof)
  76. * @property {TokenizerHandlePush} push Pushes a token back to the stack
  77. * @property {TokenizerHandleSkip} skip Skips a token, returns its presence and advances or, if non-optional and not present, throws
  78. * @property {TokenizerHandleCmnt} cmnt Gets the comment on the previous line or the line comment on the specified line, if any
  79. * @property {number} line Current line number
  80. */
  81. /**
  82. * Tokenizes the given .proto source and returns an object with useful utility functions.
  83. * @param {string} source Source contents
  84. * @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode.
  85. * @returns {ITokenizerHandle} Tokenizer handle
  86. */
  87. function tokenize(source, alternateCommentMode) {
  88. /* eslint-disable callback-return */
  89. source = source.toString();
  90. var offset = 0,
  91. length = source.length,
  92. line = 1,
  93. lastCommentLine = 0,
  94. comments = {};
  95. var stack = [];
  96. var stringDelim = null;
  97. /* istanbul ignore next */
  98. /**
  99. * Creates an error for illegal syntax.
  100. * @param {string} subject Subject
  101. * @returns {Error} Error created
  102. * @inner
  103. */
  104. function illegal(subject) {
  105. return Error("illegal " + subject + " (line " + line + ")");
  106. }
  107. /**
  108. * Reads a string till its end.
  109. * @returns {string} String read
  110. * @inner
  111. */
  112. function readString() {
  113. var re = stringDelim === "'" ? stringSingleRe : stringDoubleRe;
  114. re.lastIndex = offset - 1;
  115. var match = re.exec(source);
  116. if (!match)
  117. throw illegal("string");
  118. offset = re.lastIndex;
  119. push(stringDelim);
  120. stringDelim = null;
  121. return unescape(match[1]);
  122. }
  123. /**
  124. * Gets the character at `pos` within the source.
  125. * @param {number} pos Position
  126. * @returns {string} Character
  127. * @inner
  128. */
  129. function charAt(pos) {
  130. return source.charAt(pos);
  131. }
  132. /**
  133. * Sets the current comment text.
  134. * @param {number} start Start offset
  135. * @param {number} end End offset
  136. * @param {boolean} isLeading set if a leading comment
  137. * @returns {undefined}
  138. * @inner
  139. */
  140. function setComment(start, end, isLeading) {
  141. var comment = {
  142. type: source.charAt(start++),
  143. lineEmpty: false,
  144. leading: isLeading,
  145. };
  146. var lookback;
  147. if (alternateCommentMode) {
  148. lookback = 2; // alternate comment parsing: "//" or "/*"
  149. } else {
  150. lookback = 3; // "///" or "/**"
  151. }
  152. var commentOffset = start - lookback,
  153. c;
  154. do {
  155. if (--commentOffset < 0 ||
  156. (c = source.charAt(commentOffset)) === "\n") {
  157. comment.lineEmpty = true;
  158. break;
  159. }
  160. } while (c === " " || c === "\t");
  161. var lines = source
  162. .substring(start, end)
  163. .split(setCommentSplitRe);
  164. for (var i = 0; i < lines.length; ++i)
  165. lines[i] = lines[i]
  166. .replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "")
  167. .trim();
  168. comment.text = lines
  169. .join("\n")
  170. .trim();
  171. comments[line] = comment;
  172. lastCommentLine = line;
  173. }
  174. function isDoubleSlashCommentLine(startOffset) {
  175. var endOffset = findEndOfLine(startOffset);
  176. // see if remaining line matches comment pattern
  177. var lineText = source.substring(startOffset, endOffset);
  178. var isComment = /^\s*\/\//.test(lineText);
  179. return isComment;
  180. }
  181. function findEndOfLine(cursor) {
  182. // find end of cursor's line
  183. var endOffset = cursor;
  184. while (endOffset < length && charAt(endOffset) !== "\n") {
  185. endOffset++;
  186. }
  187. return endOffset;
  188. }
  189. /**
  190. * Obtains the next token.
  191. * @returns {string|null} Next token or `null` on eof
  192. * @inner
  193. */
  194. function next() {
  195. if (stack.length > 0)
  196. return stack.shift();
  197. if (stringDelim)
  198. return readString();
  199. var repeat,
  200. prev,
  201. curr,
  202. start,
  203. isDoc,
  204. isLeadingComment = offset === 0;
  205. do {
  206. if (offset === length)
  207. return null;
  208. repeat = false;
  209. while (whitespaceRe.test(curr = charAt(offset))) {
  210. if (curr === "\n") {
  211. isLeadingComment = true;
  212. ++line;
  213. }
  214. if (++offset === length)
  215. return null;
  216. }
  217. if (charAt(offset) === "/") {
  218. if (++offset === length) {
  219. throw illegal("comment");
  220. }
  221. if (charAt(offset) === "/") { // Line
  222. if (!alternateCommentMode) {
  223. // check for triple-slash comment
  224. isDoc = charAt(start = offset + 1) === "/";
  225. while (charAt(++offset) !== "\n") {
  226. if (offset === length) {
  227. return null;
  228. }
  229. }
  230. ++offset;
  231. if (isDoc) {
  232. setComment(start, offset - 1, isLeadingComment);
  233. // Trailing comment cannot not be multi-line,
  234. // so leading comment state should be reset to handle potential next comments
  235. isLeadingComment = true;
  236. }
  237. ++line;
  238. repeat = true;
  239. } else {
  240. // check for double-slash comments, consolidating consecutive lines
  241. start = offset;
  242. isDoc = false;
  243. if (isDoubleSlashCommentLine(offset - 1)) {
  244. isDoc = true;
  245. do {
  246. offset = findEndOfLine(offset);
  247. if (offset === length) {
  248. break;
  249. }
  250. offset++;
  251. if (!isLeadingComment) {
  252. // Trailing comment cannot not be multi-line
  253. break;
  254. }
  255. } while (isDoubleSlashCommentLine(offset));
  256. } else {
  257. offset = Math.min(length, findEndOfLine(offset) + 1);
  258. }
  259. if (isDoc) {
  260. setComment(start, offset, isLeadingComment);
  261. isLeadingComment = true;
  262. }
  263. line++;
  264. repeat = true;
  265. }
  266. } else if ((curr = charAt(offset)) === "*") { /* Block */
  267. // check for /** (regular comment mode) or /* (alternate comment mode)
  268. start = offset + 1;
  269. isDoc = alternateCommentMode || charAt(start) === "*";
  270. do {
  271. if (curr === "\n") {
  272. ++line;
  273. }
  274. if (++offset === length) {
  275. throw illegal("comment");
  276. }
  277. prev = curr;
  278. curr = charAt(offset);
  279. } while (prev !== "*" || curr !== "/");
  280. ++offset;
  281. if (isDoc) {
  282. setComment(start, offset - 2, isLeadingComment);
  283. isLeadingComment = true;
  284. }
  285. repeat = true;
  286. } else {
  287. return "/";
  288. }
  289. }
  290. } while (repeat);
  291. // offset !== length if we got here
  292. var end = offset;
  293. delimRe.lastIndex = 0;
  294. var delim = delimRe.test(charAt(end++));
  295. if (!delim)
  296. while (end < length && !delimRe.test(charAt(end)))
  297. ++end;
  298. var token = source.substring(offset, offset = end);
  299. if (token === "\"" || token === "'")
  300. stringDelim = token;
  301. return token;
  302. }
  303. /**
  304. * Pushes a token back to the stack.
  305. * @param {string} token Token
  306. * @returns {undefined}
  307. * @inner
  308. */
  309. function push(token) {
  310. stack.push(token);
  311. }
  312. /**
  313. * Peeks for the next token.
  314. * @returns {string|null} Token or `null` on eof
  315. * @inner
  316. */
  317. function peek() {
  318. if (!stack.length) {
  319. var token = next();
  320. if (token === null)
  321. return null;
  322. push(token);
  323. }
  324. return stack[0];
  325. }
  326. /**
  327. * Skips a token.
  328. * @param {string} expected Expected token
  329. * @param {boolean} [optional=false] Whether the token is optional
  330. * @returns {boolean} `true` when skipped, `false` if not
  331. * @throws {Error} When a required token is not present
  332. * @inner
  333. */
  334. function skip(expected, optional) {
  335. var actual = peek(),
  336. equals = actual === expected;
  337. if (equals) {
  338. next();
  339. return true;
  340. }
  341. if (!optional)
  342. throw illegal("token '" + actual + "', '" + expected + "' expected");
  343. return false;
  344. }
  345. /**
  346. * Gets a comment.
  347. * @param {number} [trailingLine] Line number if looking for a trailing comment
  348. * @returns {string|null} Comment text
  349. * @inner
  350. */
  351. function cmnt(trailingLine) {
  352. var ret = null;
  353. var comment;
  354. if (trailingLine === undefined) {
  355. comment = comments[line - 1];
  356. delete comments[line - 1];
  357. if (comment && (alternateCommentMode || comment.type === "*" || comment.lineEmpty)) {
  358. ret = comment.leading ? comment.text : null;
  359. }
  360. } else {
  361. /* istanbul ignore else */
  362. if (lastCommentLine < trailingLine) {
  363. peek();
  364. }
  365. comment = comments[trailingLine];
  366. delete comments[trailingLine];
  367. if (comment && !comment.lineEmpty && (alternateCommentMode || comment.type === "/")) {
  368. ret = comment.leading ? null : comment.text;
  369. }
  370. }
  371. return ret;
  372. }
  373. return Object.defineProperty({
  374. next: next,
  375. peek: peek,
  376. push: push,
  377. skip: skip,
  378. cmnt: cmnt
  379. }, "line", {
  380. get: function() { return line; }
  381. });
  382. /* eslint-enable callback-return */
  383. }