scanner.js 21 KB


  1. (function (factory) {
  2. if (typeof module === "object" && typeof module.exports === "object") {
  3. var v = factory(require, exports);
  4. if (v !== undefined) module.exports = v;
  5. }
  6. else if (typeof define === "function" && define.amd) {
  7. define(["require", "exports"], factory);
  8. }
  9. })(function (require, exports) {
  10. /*---------------------------------------------------------------------------------------------
  11. * Copyright (c) Microsoft Corporation. All rights reserved.
  12. * Licensed under the MIT License. See License.txt in the project root for license information.
  13. *--------------------------------------------------------------------------------------------*/
  14. 'use strict';
  15. Object.defineProperty(exports, "__esModule", { value: true });
  16. exports.createScanner = void 0;
  17. /**
  18. * Creates a JSON scanner on the given text.
  19. * If ignoreTrivia is set, whitespaces or comments are ignored.
  20. */
  21. function createScanner(text, ignoreTrivia = false) {
  22. const len = text.length;
  23. let pos = 0, value = '', tokenOffset = 0, token = 16 /* SyntaxKind.Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* ScanError.None */;
  24. function scanHexDigits(count, exact) {
  25. let digits = 0;
  26. let value = 0;
  27. while (digits < count || !exact) {
  28. let ch = text.charCodeAt(pos);
  29. if (ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */) {
  30. value = value * 16 + ch - 48 /* CharacterCodes._0 */;
  31. }
  32. else if (ch >= 65 /* CharacterCodes.A */ && ch <= 70 /* CharacterCodes.F */) {
  33. value = value * 16 + ch - 65 /* CharacterCodes.A */ + 10;
  34. }
  35. else if (ch >= 97 /* CharacterCodes.a */ && ch <= 102 /* CharacterCodes.f */) {
  36. value = value * 16 + ch - 97 /* CharacterCodes.a */ + 10;
  37. }
  38. else {
  39. break;
  40. }
  41. pos++;
  42. digits++;
  43. }
  44. if (digits < count) {
  45. value = -1;
  46. }
  47. return value;
  48. }
  49. function setPosition(newPosition) {
  50. pos = newPosition;
  51. value = '';
  52. tokenOffset = 0;
  53. token = 16 /* SyntaxKind.Unknown */;
  54. scanError = 0 /* ScanError.None */;
  55. }
  56. function scanNumber() {
  57. let start = pos;
  58. if (text.charCodeAt(pos) === 48 /* CharacterCodes._0 */) {
  59. pos++;
  60. }
  61. else {
  62. pos++;
  63. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  64. pos++;
  65. }
  66. }
  67. if (pos < text.length && text.charCodeAt(pos) === 46 /* CharacterCodes.dot */) {
  68. pos++;
  69. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  70. pos++;
  71. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  72. pos++;
  73. }
  74. }
  75. else {
  76. scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
  77. return text.substring(start, pos);
  78. }
  79. }
  80. let end = pos;
  81. if (pos < text.length && (text.charCodeAt(pos) === 69 /* CharacterCodes.E */ || text.charCodeAt(pos) === 101 /* CharacterCodes.e */)) {
  82. pos++;
  83. if (pos < text.length && text.charCodeAt(pos) === 43 /* CharacterCodes.plus */ || text.charCodeAt(pos) === 45 /* CharacterCodes.minus */) {
  84. pos++;
  85. }
  86. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  87. pos++;
  88. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  89. pos++;
  90. }
  91. end = pos;
  92. }
  93. else {
  94. scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
  95. }
  96. }
  97. return text.substring(start, end);
  98. }
  99. function scanString() {
  100. let result = '', start = pos;
  101. while (true) {
  102. if (pos >= len) {
  103. result += text.substring(start, pos);
  104. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  105. break;
  106. }
  107. const ch = text.charCodeAt(pos);
  108. if (ch === 34 /* CharacterCodes.doubleQuote */) {
  109. result += text.substring(start, pos);
  110. pos++;
  111. break;
  112. }
  113. if (ch === 92 /* CharacterCodes.backslash */) {
  114. result += text.substring(start, pos);
  115. pos++;
  116. if (pos >= len) {
  117. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  118. break;
  119. }
  120. const ch2 = text.charCodeAt(pos++);
  121. switch (ch2) {
  122. case 34 /* CharacterCodes.doubleQuote */:
  123. result += '\"';
  124. break;
  125. case 92 /* CharacterCodes.backslash */:
  126. result += '\\';
  127. break;
  128. case 47 /* CharacterCodes.slash */:
  129. result += '/';
  130. break;
  131. case 98 /* CharacterCodes.b */:
  132. result += '\b';
  133. break;
  134. case 102 /* CharacterCodes.f */:
  135. result += '\f';
  136. break;
  137. case 110 /* CharacterCodes.n */:
  138. result += '\n';
  139. break;
  140. case 114 /* CharacterCodes.r */:
  141. result += '\r';
  142. break;
  143. case 116 /* CharacterCodes.t */:
  144. result += '\t';
  145. break;
  146. case 117 /* CharacterCodes.u */:
  147. const ch3 = scanHexDigits(4, true);
  148. if (ch3 >= 0) {
  149. result += String.fromCharCode(ch3);
  150. }
  151. else {
  152. scanError = 4 /* ScanError.InvalidUnicode */;
  153. }
  154. break;
  155. default:
  156. scanError = 5 /* ScanError.InvalidEscapeCharacter */;
  157. }
  158. start = pos;
  159. continue;
  160. }
  161. if (ch >= 0 && ch <= 0x1f) {
  162. if (isLineBreak(ch)) {
  163. result += text.substring(start, pos);
  164. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  165. break;
  166. }
  167. else {
  168. scanError = 6 /* ScanError.InvalidCharacter */;
  169. // mark as error but continue with string
  170. }
  171. }
  172. pos++;
  173. }
  174. return result;
  175. }
  176. function scanNext() {
  177. value = '';
  178. scanError = 0 /* ScanError.None */;
  179. tokenOffset = pos;
  180. lineStartOffset = lineNumber;
  181. prevTokenLineStartOffset = tokenLineStartOffset;
  182. if (pos >= len) {
  183. // at the end
  184. tokenOffset = len;
  185. return token = 17 /* SyntaxKind.EOF */;
  186. }
  187. let code = text.charCodeAt(pos);
  188. // trivia: whitespace
  189. if (isWhiteSpace(code)) {
  190. do {
  191. pos++;
  192. value += String.fromCharCode(code);
  193. code = text.charCodeAt(pos);
  194. } while (isWhiteSpace(code));
  195. return token = 15 /* SyntaxKind.Trivia */;
  196. }
  197. // trivia: newlines
  198. if (isLineBreak(code)) {
  199. pos++;
  200. value += String.fromCharCode(code);
  201. if (code === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
  202. pos++;
  203. value += '\n';
  204. }
  205. lineNumber++;
  206. tokenLineStartOffset = pos;
  207. return token = 14 /* SyntaxKind.LineBreakTrivia */;
  208. }
  209. switch (code) {
  210. // tokens: []{}:,
  211. case 123 /* CharacterCodes.openBrace */:
  212. pos++;
  213. return token = 1 /* SyntaxKind.OpenBraceToken */;
  214. case 125 /* CharacterCodes.closeBrace */:
  215. pos++;
  216. return token = 2 /* SyntaxKind.CloseBraceToken */;
  217. case 91 /* CharacterCodes.openBracket */:
  218. pos++;
  219. return token = 3 /* SyntaxKind.OpenBracketToken */;
  220. case 93 /* CharacterCodes.closeBracket */:
  221. pos++;
  222. return token = 4 /* SyntaxKind.CloseBracketToken */;
  223. case 58 /* CharacterCodes.colon */:
  224. pos++;
  225. return token = 6 /* SyntaxKind.ColonToken */;
  226. case 44 /* CharacterCodes.comma */:
  227. pos++;
  228. return token = 5 /* SyntaxKind.CommaToken */;
  229. // strings
  230. case 34 /* CharacterCodes.doubleQuote */:
  231. pos++;
  232. value = scanString();
  233. return token = 10 /* SyntaxKind.StringLiteral */;
  234. // comments
  235. case 47 /* CharacterCodes.slash */:
  236. const start = pos - 1;
  237. // Single-line comment
  238. if (text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
  239. pos += 2;
  240. while (pos < len) {
  241. if (isLineBreak(text.charCodeAt(pos))) {
  242. break;
  243. }
  244. pos++;
  245. }
  246. value = text.substring(start, pos);
  247. return token = 12 /* SyntaxKind.LineCommentTrivia */;
  248. }
  249. // Multi-line comment
  250. if (text.charCodeAt(pos + 1) === 42 /* CharacterCodes.asterisk */) {
  251. pos += 2;
  252. const safeLength = len - 1; // For lookahead.
  253. let commentClosed = false;
  254. while (pos < safeLength) {
  255. const ch = text.charCodeAt(pos);
  256. if (ch === 42 /* CharacterCodes.asterisk */ && text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
  257. pos += 2;
  258. commentClosed = true;
  259. break;
  260. }
  261. pos++;
  262. if (isLineBreak(ch)) {
  263. if (ch === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
  264. pos++;
  265. }
  266. lineNumber++;
  267. tokenLineStartOffset = pos;
  268. }
  269. }
  270. if (!commentClosed) {
  271. pos++;
  272. scanError = 1 /* ScanError.UnexpectedEndOfComment */;
  273. }
  274. value = text.substring(start, pos);
  275. return token = 13 /* SyntaxKind.BlockCommentTrivia */;
  276. }
  277. // just a single slash
  278. value += String.fromCharCode(code);
  279. pos++;
  280. return token = 16 /* SyntaxKind.Unknown */;
  281. // numbers
  282. case 45 /* CharacterCodes.minus */:
  283. value += String.fromCharCode(code);
  284. pos++;
  285. if (pos === len || !isDigit(text.charCodeAt(pos))) {
  286. return token = 16 /* SyntaxKind.Unknown */;
  287. }
  288. // found a minus, followed by a number so
  289. // we fall through to proceed with scanning
  290. // numbers
  291. case 48 /* CharacterCodes._0 */:
  292. case 49 /* CharacterCodes._1 */:
  293. case 50 /* CharacterCodes._2 */:
  294. case 51 /* CharacterCodes._3 */:
  295. case 52 /* CharacterCodes._4 */:
  296. case 53 /* CharacterCodes._5 */:
  297. case 54 /* CharacterCodes._6 */:
  298. case 55 /* CharacterCodes._7 */:
  299. case 56 /* CharacterCodes._8 */:
  300. case 57 /* CharacterCodes._9 */:
  301. value += scanNumber();
  302. return token = 11 /* SyntaxKind.NumericLiteral */;
  303. // literals and unknown symbols
  304. default:
  305. // is a literal? Read the full word.
  306. while (pos < len && isUnknownContentCharacter(code)) {
  307. pos++;
  308. code = text.charCodeAt(pos);
  309. }
  310. if (tokenOffset !== pos) {
  311. value = text.substring(tokenOffset, pos);
  312. // keywords: true, false, null
  313. switch (value) {
  314. case 'true': return token = 8 /* SyntaxKind.TrueKeyword */;
  315. case 'false': return token = 9 /* SyntaxKind.FalseKeyword */;
  316. case 'null': return token = 7 /* SyntaxKind.NullKeyword */;
  317. }
  318. return token = 16 /* SyntaxKind.Unknown */;
  319. }
  320. // some
  321. value += String.fromCharCode(code);
  322. pos++;
  323. return token = 16 /* SyntaxKind.Unknown */;
  324. }
  325. }
  326. function isUnknownContentCharacter(code) {
  327. if (isWhiteSpace(code) || isLineBreak(code)) {
  328. return false;
  329. }
  330. switch (code) {
  331. case 125 /* CharacterCodes.closeBrace */:
  332. case 93 /* CharacterCodes.closeBracket */:
  333. case 123 /* CharacterCodes.openBrace */:
  334. case 91 /* CharacterCodes.openBracket */:
  335. case 34 /* CharacterCodes.doubleQuote */:
  336. case 58 /* CharacterCodes.colon */:
  337. case 44 /* CharacterCodes.comma */:
  338. case 47 /* CharacterCodes.slash */:
  339. return false;
  340. }
  341. return true;
  342. }
  343. function scanNextNonTrivia() {
  344. let result;
  345. do {
  346. result = scanNext();
  347. } while (result >= 12 /* SyntaxKind.LineCommentTrivia */ && result <= 15 /* SyntaxKind.Trivia */);
  348. return result;
  349. }
  350. return {
  351. setPosition: setPosition,
  352. getPosition: () => pos,
  353. scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
  354. getToken: () => token,
  355. getTokenValue: () => value,
  356. getTokenOffset: () => tokenOffset,
  357. getTokenLength: () => pos - tokenOffset,
  358. getTokenStartLine: () => lineStartOffset,
  359. getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,
  360. getTokenError: () => scanError,
  361. };
  362. }
  363. exports.createScanner = createScanner;
  364. function isWhiteSpace(ch) {
  365. return ch === 32 /* CharacterCodes.space */ || ch === 9 /* CharacterCodes.tab */;
  366. }
  367. function isLineBreak(ch) {
  368. return ch === 10 /* CharacterCodes.lineFeed */ || ch === 13 /* CharacterCodes.carriageReturn */;
  369. }
  370. function isDigit(ch) {
  371. return ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */;
  372. }
  373. var CharacterCodes;
  374. (function (CharacterCodes) {
  375. CharacterCodes[CharacterCodes["lineFeed"] = 10] = "lineFeed";
  376. CharacterCodes[CharacterCodes["carriageReturn"] = 13] = "carriageReturn";
  377. CharacterCodes[CharacterCodes["space"] = 32] = "space";
  378. CharacterCodes[CharacterCodes["_0"] = 48] = "_0";
  379. CharacterCodes[CharacterCodes["_1"] = 49] = "_1";
  380. CharacterCodes[CharacterCodes["_2"] = 50] = "_2";
  381. CharacterCodes[CharacterCodes["_3"] = 51] = "_3";
  382. CharacterCodes[CharacterCodes["_4"] = 52] = "_4";
  383. CharacterCodes[CharacterCodes["_5"] = 53] = "_5";
  384. CharacterCodes[CharacterCodes["_6"] = 54] = "_6";
  385. CharacterCodes[CharacterCodes["_7"] = 55] = "_7";
  386. CharacterCodes[CharacterCodes["_8"] = 56] = "_8";
  387. CharacterCodes[CharacterCodes["_9"] = 57] = "_9";
  388. CharacterCodes[CharacterCodes["a"] = 97] = "a";
  389. CharacterCodes[CharacterCodes["b"] = 98] = "b";
  390. CharacterCodes[CharacterCodes["c"] = 99] = "c";
  391. CharacterCodes[CharacterCodes["d"] = 100] = "d";
  392. CharacterCodes[CharacterCodes["e"] = 101] = "e";
  393. CharacterCodes[CharacterCodes["f"] = 102] = "f";
  394. CharacterCodes[CharacterCodes["g"] = 103] = "g";
  395. CharacterCodes[CharacterCodes["h"] = 104] = "h";
  396. CharacterCodes[CharacterCodes["i"] = 105] = "i";
  397. CharacterCodes[CharacterCodes["j"] = 106] = "j";
  398. CharacterCodes[CharacterCodes["k"] = 107] = "k";
  399. CharacterCodes[CharacterCodes["l"] = 108] = "l";
  400. CharacterCodes[CharacterCodes["m"] = 109] = "m";
  401. CharacterCodes[CharacterCodes["n"] = 110] = "n";
  402. CharacterCodes[CharacterCodes["o"] = 111] = "o";
  403. CharacterCodes[CharacterCodes["p"] = 112] = "p";
  404. CharacterCodes[CharacterCodes["q"] = 113] = "q";
  405. CharacterCodes[CharacterCodes["r"] = 114] = "r";
  406. CharacterCodes[CharacterCodes["s"] = 115] = "s";
  407. CharacterCodes[CharacterCodes["t"] = 116] = "t";
  408. CharacterCodes[CharacterCodes["u"] = 117] = "u";
  409. CharacterCodes[CharacterCodes["v"] = 118] = "v";
  410. CharacterCodes[CharacterCodes["w"] = 119] = "w";
  411. CharacterCodes[CharacterCodes["x"] = 120] = "x";
  412. CharacterCodes[CharacterCodes["y"] = 121] = "y";
  413. CharacterCodes[CharacterCodes["z"] = 122] = "z";
  414. CharacterCodes[CharacterCodes["A"] = 65] = "A";
  415. CharacterCodes[CharacterCodes["B"] = 66] = "B";
  416. CharacterCodes[CharacterCodes["C"] = 67] = "C";
  417. CharacterCodes[CharacterCodes["D"] = 68] = "D";
  418. CharacterCodes[CharacterCodes["E"] = 69] = "E";
  419. CharacterCodes[CharacterCodes["F"] = 70] = "F";
  420. CharacterCodes[CharacterCodes["G"] = 71] = "G";
  421. CharacterCodes[CharacterCodes["H"] = 72] = "H";
  422. CharacterCodes[CharacterCodes["I"] = 73] = "I";
  423. CharacterCodes[CharacterCodes["J"] = 74] = "J";
  424. CharacterCodes[CharacterCodes["K"] = 75] = "K";
  425. CharacterCodes[CharacterCodes["L"] = 76] = "L";
  426. CharacterCodes[CharacterCodes["M"] = 77] = "M";
  427. CharacterCodes[CharacterCodes["N"] = 78] = "N";
  428. CharacterCodes[CharacterCodes["O"] = 79] = "O";
  429. CharacterCodes[CharacterCodes["P"] = 80] = "P";
  430. CharacterCodes[CharacterCodes["Q"] = 81] = "Q";
  431. CharacterCodes[CharacterCodes["R"] = 82] = "R";
  432. CharacterCodes[CharacterCodes["S"] = 83] = "S";
  433. CharacterCodes[CharacterCodes["T"] = 84] = "T";
  434. CharacterCodes[CharacterCodes["U"] = 85] = "U";
  435. CharacterCodes[CharacterCodes["V"] = 86] = "V";
  436. CharacterCodes[CharacterCodes["W"] = 87] = "W";
  437. CharacterCodes[CharacterCodes["X"] = 88] = "X";
  438. CharacterCodes[CharacterCodes["Y"] = 89] = "Y";
  439. CharacterCodes[CharacterCodes["Z"] = 90] = "Z";
  440. CharacterCodes[CharacterCodes["asterisk"] = 42] = "asterisk";
  441. CharacterCodes[CharacterCodes["backslash"] = 92] = "backslash";
  442. CharacterCodes[CharacterCodes["closeBrace"] = 125] = "closeBrace";
  443. CharacterCodes[CharacterCodes["closeBracket"] = 93] = "closeBracket";
  444. CharacterCodes[CharacterCodes["colon"] = 58] = "colon";
  445. CharacterCodes[CharacterCodes["comma"] = 44] = "comma";
  446. CharacterCodes[CharacterCodes["dot"] = 46] = "dot";
  447. CharacterCodes[CharacterCodes["doubleQuote"] = 34] = "doubleQuote";
  448. CharacterCodes[CharacterCodes["minus"] = 45] = "minus";
  449. CharacterCodes[CharacterCodes["openBrace"] = 123] = "openBrace";
  450. CharacterCodes[CharacterCodes["openBracket"] = 91] = "openBracket";
  451. CharacterCodes[CharacterCodes["plus"] = 43] = "plus";
  452. CharacterCodes[CharacterCodes["slash"] = 47] = "slash";
  453. CharacterCodes[CharacterCodes["formFeed"] = 12] = "formFeed";
  454. CharacterCodes[CharacterCodes["tab"] = 9] = "tab";
  455. })(CharacterCodes || (CharacterCodes = {}));
  456. });