scanner.js 19 KB


  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. 'use strict';
  6. /**
  7. * Creates a JSON scanner on the given text.
  8. * If ignoreTrivia is set, whitespaces or comments are ignored.
  9. */
  10. export function createScanner(text, ignoreTrivia = false) {
  11. const len = text.length;
  12. let pos = 0, value = '', tokenOffset = 0, token = 16 /* SyntaxKind.Unknown */, lineNumber = 0, lineStartOffset = 0, tokenLineStartOffset = 0, prevTokenLineStartOffset = 0, scanError = 0 /* ScanError.None */;
  13. function scanHexDigits(count, exact) {
  14. let digits = 0;
  15. let value = 0;
  16. while (digits < count || !exact) {
  17. let ch = text.charCodeAt(pos);
  18. if (ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */) {
  19. value = value * 16 + ch - 48 /* CharacterCodes._0 */;
  20. }
  21. else if (ch >= 65 /* CharacterCodes.A */ && ch <= 70 /* CharacterCodes.F */) {
  22. value = value * 16 + ch - 65 /* CharacterCodes.A */ + 10;
  23. }
  24. else if (ch >= 97 /* CharacterCodes.a */ && ch <= 102 /* CharacterCodes.f */) {
  25. value = value * 16 + ch - 97 /* CharacterCodes.a */ + 10;
  26. }
  27. else {
  28. break;
  29. }
  30. pos++;
  31. digits++;
  32. }
  33. if (digits < count) {
  34. value = -1;
  35. }
  36. return value;
  37. }
  38. function setPosition(newPosition) {
  39. pos = newPosition;
  40. value = '';
  41. tokenOffset = 0;
  42. token = 16 /* SyntaxKind.Unknown */;
  43. scanError = 0 /* ScanError.None */;
  44. }
  45. function scanNumber() {
  46. let start = pos;
  47. if (text.charCodeAt(pos) === 48 /* CharacterCodes._0 */) {
  48. pos++;
  49. }
  50. else {
  51. pos++;
  52. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  53. pos++;
  54. }
  55. }
  56. if (pos < text.length && text.charCodeAt(pos) === 46 /* CharacterCodes.dot */) {
  57. pos++;
  58. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  59. pos++;
  60. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  61. pos++;
  62. }
  63. }
  64. else {
  65. scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
  66. return text.substring(start, pos);
  67. }
  68. }
  69. let end = pos;
  70. if (pos < text.length && (text.charCodeAt(pos) === 69 /* CharacterCodes.E */ || text.charCodeAt(pos) === 101 /* CharacterCodes.e */)) {
  71. pos++;
  72. if (pos < text.length && text.charCodeAt(pos) === 43 /* CharacterCodes.plus */ || text.charCodeAt(pos) === 45 /* CharacterCodes.minus */) {
  73. pos++;
  74. }
  75. if (pos < text.length && isDigit(text.charCodeAt(pos))) {
  76. pos++;
  77. while (pos < text.length && isDigit(text.charCodeAt(pos))) {
  78. pos++;
  79. }
  80. end = pos;
  81. }
  82. else {
  83. scanError = 3 /* ScanError.UnexpectedEndOfNumber */;
  84. }
  85. }
  86. return text.substring(start, end);
  87. }
  88. function scanString() {
  89. let result = '', start = pos;
  90. while (true) {
  91. if (pos >= len) {
  92. result += text.substring(start, pos);
  93. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  94. break;
  95. }
  96. const ch = text.charCodeAt(pos);
  97. if (ch === 34 /* CharacterCodes.doubleQuote */) {
  98. result += text.substring(start, pos);
  99. pos++;
  100. break;
  101. }
  102. if (ch === 92 /* CharacterCodes.backslash */) {
  103. result += text.substring(start, pos);
  104. pos++;
  105. if (pos >= len) {
  106. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  107. break;
  108. }
  109. const ch2 = text.charCodeAt(pos++);
  110. switch (ch2) {
  111. case 34 /* CharacterCodes.doubleQuote */:
  112. result += '\"';
  113. break;
  114. case 92 /* CharacterCodes.backslash */:
  115. result += '\\';
  116. break;
  117. case 47 /* CharacterCodes.slash */:
  118. result += '/';
  119. break;
  120. case 98 /* CharacterCodes.b */:
  121. result += '\b';
  122. break;
  123. case 102 /* CharacterCodes.f */:
  124. result += '\f';
  125. break;
  126. case 110 /* CharacterCodes.n */:
  127. result += '\n';
  128. break;
  129. case 114 /* CharacterCodes.r */:
  130. result += '\r';
  131. break;
  132. case 116 /* CharacterCodes.t */:
  133. result += '\t';
  134. break;
  135. case 117 /* CharacterCodes.u */:
  136. const ch3 = scanHexDigits(4, true);
  137. if (ch3 >= 0) {
  138. result += String.fromCharCode(ch3);
  139. }
  140. else {
  141. scanError = 4 /* ScanError.InvalidUnicode */;
  142. }
  143. break;
  144. default:
  145. scanError = 5 /* ScanError.InvalidEscapeCharacter */;
  146. }
  147. start = pos;
  148. continue;
  149. }
  150. if (ch >= 0 && ch <= 0x1f) {
  151. if (isLineBreak(ch)) {
  152. result += text.substring(start, pos);
  153. scanError = 2 /* ScanError.UnexpectedEndOfString */;
  154. break;
  155. }
  156. else {
  157. scanError = 6 /* ScanError.InvalidCharacter */;
  158. // mark as error but continue with string
  159. }
  160. }
  161. pos++;
  162. }
  163. return result;
  164. }
  165. function scanNext() {
  166. value = '';
  167. scanError = 0 /* ScanError.None */;
  168. tokenOffset = pos;
  169. lineStartOffset = lineNumber;
  170. prevTokenLineStartOffset = tokenLineStartOffset;
  171. if (pos >= len) {
  172. // at the end
  173. tokenOffset = len;
  174. return token = 17 /* SyntaxKind.EOF */;
  175. }
  176. let code = text.charCodeAt(pos);
  177. // trivia: whitespace
  178. if (isWhiteSpace(code)) {
  179. do {
  180. pos++;
  181. value += String.fromCharCode(code);
  182. code = text.charCodeAt(pos);
  183. } while (isWhiteSpace(code));
  184. return token = 15 /* SyntaxKind.Trivia */;
  185. }
  186. // trivia: newlines
  187. if (isLineBreak(code)) {
  188. pos++;
  189. value += String.fromCharCode(code);
  190. if (code === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
  191. pos++;
  192. value += '\n';
  193. }
  194. lineNumber++;
  195. tokenLineStartOffset = pos;
  196. return token = 14 /* SyntaxKind.LineBreakTrivia */;
  197. }
  198. switch (code) {
  199. // tokens: []{}:,
  200. case 123 /* CharacterCodes.openBrace */:
  201. pos++;
  202. return token = 1 /* SyntaxKind.OpenBraceToken */;
  203. case 125 /* CharacterCodes.closeBrace */:
  204. pos++;
  205. return token = 2 /* SyntaxKind.CloseBraceToken */;
  206. case 91 /* CharacterCodes.openBracket */:
  207. pos++;
  208. return token = 3 /* SyntaxKind.OpenBracketToken */;
  209. case 93 /* CharacterCodes.closeBracket */:
  210. pos++;
  211. return token = 4 /* SyntaxKind.CloseBracketToken */;
  212. case 58 /* CharacterCodes.colon */:
  213. pos++;
  214. return token = 6 /* SyntaxKind.ColonToken */;
  215. case 44 /* CharacterCodes.comma */:
  216. pos++;
  217. return token = 5 /* SyntaxKind.CommaToken */;
  218. // strings
  219. case 34 /* CharacterCodes.doubleQuote */:
  220. pos++;
  221. value = scanString();
  222. return token = 10 /* SyntaxKind.StringLiteral */;
  223. // comments
  224. case 47 /* CharacterCodes.slash */:
  225. const start = pos - 1;
  226. // Single-line comment
  227. if (text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
  228. pos += 2;
  229. while (pos < len) {
  230. if (isLineBreak(text.charCodeAt(pos))) {
  231. break;
  232. }
  233. pos++;
  234. }
  235. value = text.substring(start, pos);
  236. return token = 12 /* SyntaxKind.LineCommentTrivia */;
  237. }
  238. // Multi-line comment
  239. if (text.charCodeAt(pos + 1) === 42 /* CharacterCodes.asterisk */) {
  240. pos += 2;
  241. const safeLength = len - 1; // For lookahead.
  242. let commentClosed = false;
  243. while (pos < safeLength) {
  244. const ch = text.charCodeAt(pos);
  245. if (ch === 42 /* CharacterCodes.asterisk */ && text.charCodeAt(pos + 1) === 47 /* CharacterCodes.slash */) {
  246. pos += 2;
  247. commentClosed = true;
  248. break;
  249. }
  250. pos++;
  251. if (isLineBreak(ch)) {
  252. if (ch === 13 /* CharacterCodes.carriageReturn */ && text.charCodeAt(pos) === 10 /* CharacterCodes.lineFeed */) {
  253. pos++;
  254. }
  255. lineNumber++;
  256. tokenLineStartOffset = pos;
  257. }
  258. }
  259. if (!commentClosed) {
  260. pos++;
  261. scanError = 1 /* ScanError.UnexpectedEndOfComment */;
  262. }
  263. value = text.substring(start, pos);
  264. return token = 13 /* SyntaxKind.BlockCommentTrivia */;
  265. }
  266. // just a single slash
  267. value += String.fromCharCode(code);
  268. pos++;
  269. return token = 16 /* SyntaxKind.Unknown */;
  270. // numbers
  271. case 45 /* CharacterCodes.minus */:
  272. value += String.fromCharCode(code);
  273. pos++;
  274. if (pos === len || !isDigit(text.charCodeAt(pos))) {
  275. return token = 16 /* SyntaxKind.Unknown */;
  276. }
  277. // found a minus, followed by a number so
  278. // we fall through to proceed with scanning
  279. // numbers
  280. case 48 /* CharacterCodes._0 */:
  281. case 49 /* CharacterCodes._1 */:
  282. case 50 /* CharacterCodes._2 */:
  283. case 51 /* CharacterCodes._3 */:
  284. case 52 /* CharacterCodes._4 */:
  285. case 53 /* CharacterCodes._5 */:
  286. case 54 /* CharacterCodes._6 */:
  287. case 55 /* CharacterCodes._7 */:
  288. case 56 /* CharacterCodes._8 */:
  289. case 57 /* CharacterCodes._9 */:
  290. value += scanNumber();
  291. return token = 11 /* SyntaxKind.NumericLiteral */;
  292. // literals and unknown symbols
  293. default:
  294. // is a literal? Read the full word.
  295. while (pos < len && isUnknownContentCharacter(code)) {
  296. pos++;
  297. code = text.charCodeAt(pos);
  298. }
  299. if (tokenOffset !== pos) {
  300. value = text.substring(tokenOffset, pos);
  301. // keywords: true, false, null
  302. switch (value) {
  303. case 'true': return token = 8 /* SyntaxKind.TrueKeyword */;
  304. case 'false': return token = 9 /* SyntaxKind.FalseKeyword */;
  305. case 'null': return token = 7 /* SyntaxKind.NullKeyword */;
  306. }
  307. return token = 16 /* SyntaxKind.Unknown */;
  308. }
  309. // some
  310. value += String.fromCharCode(code);
  311. pos++;
  312. return token = 16 /* SyntaxKind.Unknown */;
  313. }
  314. }
  315. function isUnknownContentCharacter(code) {
  316. if (isWhiteSpace(code) || isLineBreak(code)) {
  317. return false;
  318. }
  319. switch (code) {
  320. case 125 /* CharacterCodes.closeBrace */:
  321. case 93 /* CharacterCodes.closeBracket */:
  322. case 123 /* CharacterCodes.openBrace */:
  323. case 91 /* CharacterCodes.openBracket */:
  324. case 34 /* CharacterCodes.doubleQuote */:
  325. case 58 /* CharacterCodes.colon */:
  326. case 44 /* CharacterCodes.comma */:
  327. case 47 /* CharacterCodes.slash */:
  328. return false;
  329. }
  330. return true;
  331. }
  332. function scanNextNonTrivia() {
  333. let result;
  334. do {
  335. result = scanNext();
  336. } while (result >= 12 /* SyntaxKind.LineCommentTrivia */ && result <= 15 /* SyntaxKind.Trivia */);
  337. return result;
  338. }
  339. return {
  340. setPosition: setPosition,
  341. getPosition: () => pos,
  342. scan: ignoreTrivia ? scanNextNonTrivia : scanNext,
  343. getToken: () => token,
  344. getTokenValue: () => value,
  345. getTokenOffset: () => tokenOffset,
  346. getTokenLength: () => pos - tokenOffset,
  347. getTokenStartLine: () => lineStartOffset,
  348. getTokenStartCharacter: () => tokenOffset - prevTokenLineStartOffset,
  349. getTokenError: () => scanError,
  350. };
  351. }
  352. function isWhiteSpace(ch) {
  353. return ch === 32 /* CharacterCodes.space */ || ch === 9 /* CharacterCodes.tab */;
  354. }
  355. function isLineBreak(ch) {
  356. return ch === 10 /* CharacterCodes.lineFeed */ || ch === 13 /* CharacterCodes.carriageReturn */;
  357. }
  358. function isDigit(ch) {
  359. return ch >= 48 /* CharacterCodes._0 */ && ch <= 57 /* CharacterCodes._9 */;
  360. }
  361. var CharacterCodes;
  362. (function (CharacterCodes) {
  363. CharacterCodes[CharacterCodes["lineFeed"] = 10] = "lineFeed";
  364. CharacterCodes[CharacterCodes["carriageReturn"] = 13] = "carriageReturn";
  365. CharacterCodes[CharacterCodes["space"] = 32] = "space";
  366. CharacterCodes[CharacterCodes["_0"] = 48] = "_0";
  367. CharacterCodes[CharacterCodes["_1"] = 49] = "_1";
  368. CharacterCodes[CharacterCodes["_2"] = 50] = "_2";
  369. CharacterCodes[CharacterCodes["_3"] = 51] = "_3";
  370. CharacterCodes[CharacterCodes["_4"] = 52] = "_4";
  371. CharacterCodes[CharacterCodes["_5"] = 53] = "_5";
  372. CharacterCodes[CharacterCodes["_6"] = 54] = "_6";
  373. CharacterCodes[CharacterCodes["_7"] = 55] = "_7";
  374. CharacterCodes[CharacterCodes["_8"] = 56] = "_8";
  375. CharacterCodes[CharacterCodes["_9"] = 57] = "_9";
  376. CharacterCodes[CharacterCodes["a"] = 97] = "a";
  377. CharacterCodes[CharacterCodes["b"] = 98] = "b";
  378. CharacterCodes[CharacterCodes["c"] = 99] = "c";
  379. CharacterCodes[CharacterCodes["d"] = 100] = "d";
  380. CharacterCodes[CharacterCodes["e"] = 101] = "e";
  381. CharacterCodes[CharacterCodes["f"] = 102] = "f";
  382. CharacterCodes[CharacterCodes["g"] = 103] = "g";
  383. CharacterCodes[CharacterCodes["h"] = 104] = "h";
  384. CharacterCodes[CharacterCodes["i"] = 105] = "i";
  385. CharacterCodes[CharacterCodes["j"] = 106] = "j";
  386. CharacterCodes[CharacterCodes["k"] = 107] = "k";
  387. CharacterCodes[CharacterCodes["l"] = 108] = "l";
  388. CharacterCodes[CharacterCodes["m"] = 109] = "m";
  389. CharacterCodes[CharacterCodes["n"] = 110] = "n";
  390. CharacterCodes[CharacterCodes["o"] = 111] = "o";
  391. CharacterCodes[CharacterCodes["p"] = 112] = "p";
  392. CharacterCodes[CharacterCodes["q"] = 113] = "q";
  393. CharacterCodes[CharacterCodes["r"] = 114] = "r";
  394. CharacterCodes[CharacterCodes["s"] = 115] = "s";
  395. CharacterCodes[CharacterCodes["t"] = 116] = "t";
  396. CharacterCodes[CharacterCodes["u"] = 117] = "u";
  397. CharacterCodes[CharacterCodes["v"] = 118] = "v";
  398. CharacterCodes[CharacterCodes["w"] = 119] = "w";
  399. CharacterCodes[CharacterCodes["x"] = 120] = "x";
  400. CharacterCodes[CharacterCodes["y"] = 121] = "y";
  401. CharacterCodes[CharacterCodes["z"] = 122] = "z";
  402. CharacterCodes[CharacterCodes["A"] = 65] = "A";
  403. CharacterCodes[CharacterCodes["B"] = 66] = "B";
  404. CharacterCodes[CharacterCodes["C"] = 67] = "C";
  405. CharacterCodes[CharacterCodes["D"] = 68] = "D";
  406. CharacterCodes[CharacterCodes["E"] = 69] = "E";
  407. CharacterCodes[CharacterCodes["F"] = 70] = "F";
  408. CharacterCodes[CharacterCodes["G"] = 71] = "G";
  409. CharacterCodes[CharacterCodes["H"] = 72] = "H";
  410. CharacterCodes[CharacterCodes["I"] = 73] = "I";
  411. CharacterCodes[CharacterCodes["J"] = 74] = "J";
  412. CharacterCodes[CharacterCodes["K"] = 75] = "K";
  413. CharacterCodes[CharacterCodes["L"] = 76] = "L";
  414. CharacterCodes[CharacterCodes["M"] = 77] = "M";
  415. CharacterCodes[CharacterCodes["N"] = 78] = "N";
  416. CharacterCodes[CharacterCodes["O"] = 79] = "O";
  417. CharacterCodes[CharacterCodes["P"] = 80] = "P";
  418. CharacterCodes[CharacterCodes["Q"] = 81] = "Q";
  419. CharacterCodes[CharacterCodes["R"] = 82] = "R";
  420. CharacterCodes[CharacterCodes["S"] = 83] = "S";
  421. CharacterCodes[CharacterCodes["T"] = 84] = "T";
  422. CharacterCodes[CharacterCodes["U"] = 85] = "U";
  423. CharacterCodes[CharacterCodes["V"] = 86] = "V";
  424. CharacterCodes[CharacterCodes["W"] = 87] = "W";
  425. CharacterCodes[CharacterCodes["X"] = 88] = "X";
  426. CharacterCodes[CharacterCodes["Y"] = 89] = "Y";
  427. CharacterCodes[CharacterCodes["Z"] = 90] = "Z";
  428. CharacterCodes[CharacterCodes["asterisk"] = 42] = "asterisk";
  429. CharacterCodes[CharacterCodes["backslash"] = 92] = "backslash";
  430. CharacterCodes[CharacterCodes["closeBrace"] = 125] = "closeBrace";
  431. CharacterCodes[CharacterCodes["closeBracket"] = 93] = "closeBracket";
  432. CharacterCodes[CharacterCodes["colon"] = 58] = "colon";
  433. CharacterCodes[CharacterCodes["comma"] = 44] = "comma";
  434. CharacterCodes[CharacterCodes["dot"] = 46] = "dot";
  435. CharacterCodes[CharacterCodes["doubleQuote"] = 34] = "doubleQuote";
  436. CharacterCodes[CharacterCodes["minus"] = 45] = "minus";
  437. CharacterCodes[CharacterCodes["openBrace"] = 123] = "openBrace";
  438. CharacterCodes[CharacterCodes["openBracket"] = 91] = "openBracket";
  439. CharacterCodes[CharacterCodes["plus"] = 43] = "plus";
  440. CharacterCodes[CharacterCodes["slash"] = 47] = "slash";
  441. CharacterCodes[CharacterCodes["formFeed"] = 12] = "formFeed";
  442. CharacterCodes[CharacterCodes["tab"] = 9] = "tab";
  443. })(CharacterCodes || (CharacterCodes = {}));