lexer.mjs 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. import { syntaxError } from '../error/syntaxError.mjs';
  2. import { Token } from './ast.mjs';
  3. import { dedentBlockStringLines } from './blockString.mjs';
  4. import { isDigit, isNameContinue, isNameStart } from './characterClasses.mjs';
  5. import { TokenKind } from './tokenKind.mjs';
  6. /**
  7. * Given a Source object, creates a Lexer for that source.
  8. * A Lexer is a stateful stream generator in that every time
  9. * it is advanced, it returns the next token in the Source. Assuming the
  10. * source lexes, the final Token emitted by the lexer will be of kind
  11. * EOF, after which the lexer will repeatedly return the same EOF token
  12. * whenever called.
  13. */
  14. export class Lexer {
  15. /**
  16. * The previously focused non-ignored token.
  17. */
  18. /**
  19. * The currently focused non-ignored token.
  20. */
  21. /**
  22. * The (1-indexed) line containing the current token.
  23. */
  24. /**
  25. * The character offset at which the current line begins.
  26. */
  27. constructor(source) {
  28. const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0);
  29. this.source = source;
  30. this.lastToken = startOfFileToken;
  31. this.token = startOfFileToken;
  32. this.line = 1;
  33. this.lineStart = 0;
  34. }
  35. get [Symbol.toStringTag]() {
  36. return 'Lexer';
  37. }
  38. /**
  39. * Advances the token stream to the next non-ignored token.
  40. */
  41. advance() {
  42. this.lastToken = this.token;
  43. const token = (this.token = this.lookahead());
  44. return token;
  45. }
  46. /**
  47. * Looks ahead and returns the next non-ignored token, but does not change
  48. * the state of Lexer.
  49. */
  50. lookahead() {
  51. let token = this.token;
  52. if (token.kind !== TokenKind.EOF) {
  53. do {
  54. if (token.next) {
  55. token = token.next;
  56. } else {
  57. // Read the next token and form a link in the token linked-list.
  58. const nextToken = readNextToken(this, token.end); // @ts-expect-error next is only mutable during parsing.
  59. token.next = nextToken; // @ts-expect-error prev is only mutable during parsing.
  60. nextToken.prev = token;
  61. token = nextToken;
  62. }
  63. } while (token.kind === TokenKind.COMMENT);
  64. }
  65. return token;
  66. }
  67. }
  68. /**
  69. * @internal
  70. */
  71. export function isPunctuatorTokenKind(kind) {
  72. return (
  73. kind === TokenKind.BANG ||
  74. kind === TokenKind.DOLLAR ||
  75. kind === TokenKind.AMP ||
  76. kind === TokenKind.PAREN_L ||
  77. kind === TokenKind.PAREN_R ||
  78. kind === TokenKind.SPREAD ||
  79. kind === TokenKind.COLON ||
  80. kind === TokenKind.EQUALS ||
  81. kind === TokenKind.AT ||
  82. kind === TokenKind.BRACKET_L ||
  83. kind === TokenKind.BRACKET_R ||
  84. kind === TokenKind.BRACE_L ||
  85. kind === TokenKind.PIPE ||
  86. kind === TokenKind.BRACE_R
  87. );
  88. }
  89. /**
  90. * A Unicode scalar value is any Unicode code point except surrogate code
  91. * points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and
  92. * 0xE000 to 0x10FFFF.
  93. *
  94. * SourceCharacter ::
  95. * - "Any Unicode scalar value"
  96. */
  97. function isUnicodeScalarValue(code) {
  98. return (
  99. (code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff)
  100. );
  101. }
  102. /**
  103. * The GraphQL specification defines source text as a sequence of unicode scalar
  104. * values (which Unicode defines to exclude surrogate code points). However
  105. * JavaScript defines strings as a sequence of UTF-16 code units which may
  106. * include surrogates. A surrogate pair is a valid source character as it
  107. * encodes a supplementary code point (above U+FFFF), but unpaired surrogate
  108. * code points are not valid source characters.
  109. */
  110. function isSupplementaryCodePoint(body, location) {
  111. return (
  112. isLeadingSurrogate(body.charCodeAt(location)) &&
  113. isTrailingSurrogate(body.charCodeAt(location + 1))
  114. );
  115. }
  116. function isLeadingSurrogate(code) {
  117. return code >= 0xd800 && code <= 0xdbff;
  118. }
  119. function isTrailingSurrogate(code) {
  120. return code >= 0xdc00 && code <= 0xdfff;
  121. }
  122. /**
  123. * Prints the code point (or end of file reference) at a given location in a
  124. * source for use in error messages.
  125. *
  126. * Printable ASCII is printed quoted, while other points are printed in Unicode
  127. * code point form (ie. U+1234).
  128. */
  129. function printCodePointAt(lexer, location) {
  130. const code = lexer.source.body.codePointAt(location);
  131. if (code === undefined) {
  132. return TokenKind.EOF;
  133. } else if (code >= 0x0020 && code <= 0x007e) {
  134. // Printable ASCII
  135. const char = String.fromCodePoint(code);
  136. return char === '"' ? "'\"'" : `"${char}"`;
  137. } // Unicode code point
  138. return 'U+' + code.toString(16).toUpperCase().padStart(4, '0');
  139. }
  140. /**
  141. * Create a token with line and column location information.
  142. */
  143. function createToken(lexer, kind, start, end, value) {
  144. const line = lexer.line;
  145. const col = 1 + start - lexer.lineStart;
  146. return new Token(kind, start, end, line, col, value);
  147. }
  148. /**
  149. * Gets the next token from the source starting at the given position.
  150. *
  151. * This skips over whitespace until it finds the next lexable token, then lexes
  152. * punctuators immediately or calls the appropriate helper function for more
  153. * complicated tokens.
  154. */
  155. function readNextToken(lexer, start) {
  156. const body = lexer.source.body;
  157. const bodyLength = body.length;
  158. let position = start;
  159. while (position < bodyLength) {
  160. const code = body.charCodeAt(position); // SourceCharacter
  161. switch (code) {
  162. // Ignored ::
  163. // - UnicodeBOM
  164. // - WhiteSpace
  165. // - LineTerminator
  166. // - Comment
  167. // - Comma
  168. //
  169. // UnicodeBOM :: "Byte Order Mark (U+FEFF)"
  170. //
  171. // WhiteSpace ::
  172. // - "Horizontal Tab (U+0009)"
  173. // - "Space (U+0020)"
  174. //
  175. // Comma :: ,
  176. case 0xfeff: // <BOM>
  177. case 0x0009: // \t
  178. case 0x0020: // <space>
  179. case 0x002c:
  180. // ,
  181. ++position;
  182. continue;
  183. // LineTerminator ::
  184. // - "New Line (U+000A)"
  185. // - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
  186. // - "Carriage Return (U+000D)" "New Line (U+000A)"
  187. case 0x000a:
  188. // \n
  189. ++position;
  190. ++lexer.line;
  191. lexer.lineStart = position;
  192. continue;
  193. case 0x000d:
  194. // \r
  195. if (body.charCodeAt(position + 1) === 0x000a) {
  196. position += 2;
  197. } else {
  198. ++position;
  199. }
  200. ++lexer.line;
  201. lexer.lineStart = position;
  202. continue;
  203. // Comment
  204. case 0x0023:
  205. // #
  206. return readComment(lexer, position);
  207. // Token ::
  208. // - Punctuator
  209. // - Name
  210. // - IntValue
  211. // - FloatValue
  212. // - StringValue
  213. //
  214. // Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
  215. case 0x0021:
  216. // !
  217. return createToken(lexer, TokenKind.BANG, position, position + 1);
  218. case 0x0024:
  219. // $
  220. return createToken(lexer, TokenKind.DOLLAR, position, position + 1);
  221. case 0x0026:
  222. // &
  223. return createToken(lexer, TokenKind.AMP, position, position + 1);
  224. case 0x0028:
  225. // (
  226. return createToken(lexer, TokenKind.PAREN_L, position, position + 1);
  227. case 0x0029:
  228. // )
  229. return createToken(lexer, TokenKind.PAREN_R, position, position + 1);
  230. case 0x002e:
  231. // .
  232. if (
  233. body.charCodeAt(position + 1) === 0x002e &&
  234. body.charCodeAt(position + 2) === 0x002e
  235. ) {
  236. return createToken(lexer, TokenKind.SPREAD, position, position + 3);
  237. }
  238. break;
  239. case 0x003a:
  240. // :
  241. return createToken(lexer, TokenKind.COLON, position, position + 1);
  242. case 0x003d:
  243. // =
  244. return createToken(lexer, TokenKind.EQUALS, position, position + 1);
  245. case 0x0040:
  246. // @
  247. return createToken(lexer, TokenKind.AT, position, position + 1);
  248. case 0x005b:
  249. // [
  250. return createToken(lexer, TokenKind.BRACKET_L, position, position + 1);
  251. case 0x005d:
  252. // ]
  253. return createToken(lexer, TokenKind.BRACKET_R, position, position + 1);
  254. case 0x007b:
  255. // {
  256. return createToken(lexer, TokenKind.BRACE_L, position, position + 1);
  257. case 0x007c:
  258. // |
  259. return createToken(lexer, TokenKind.PIPE, position, position + 1);
  260. case 0x007d:
  261. // }
  262. return createToken(lexer, TokenKind.BRACE_R, position, position + 1);
  263. // StringValue
  264. case 0x0022:
  265. // "
  266. if (
  267. body.charCodeAt(position + 1) === 0x0022 &&
  268. body.charCodeAt(position + 2) === 0x0022
  269. ) {
  270. return readBlockString(lexer, position);
  271. }
  272. return readString(lexer, position);
  273. } // IntValue | FloatValue (Digit | -)
  274. if (isDigit(code) || code === 0x002d) {
  275. return readNumber(lexer, position, code);
  276. } // Name
  277. if (isNameStart(code)) {
  278. return readName(lexer, position);
  279. }
  280. throw syntaxError(
  281. lexer.source,
  282. position,
  283. code === 0x0027
  284. ? 'Unexpected single quote character (\'), did you mean to use a double quote (")?'
  285. : isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position)
  286. ? `Unexpected character: ${printCodePointAt(lexer, position)}.`
  287. : `Invalid character: ${printCodePointAt(lexer, position)}.`,
  288. );
  289. }
  290. return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength);
  291. }
  292. /**
  293. * Reads a comment token from the source file.
  294. *
  295. * ```
  296. * Comment :: # CommentChar* [lookahead != CommentChar]
  297. *
  298. * CommentChar :: SourceCharacter but not LineTerminator
  299. * ```
  300. */
  301. function readComment(lexer, start) {
  302. const body = lexer.source.body;
  303. const bodyLength = body.length;
  304. let position = start + 1;
  305. while (position < bodyLength) {
  306. const code = body.charCodeAt(position); // LineTerminator (\n | \r)
  307. if (code === 0x000a || code === 0x000d) {
  308. break;
  309. } // SourceCharacter
  310. if (isUnicodeScalarValue(code)) {
  311. ++position;
  312. } else if (isSupplementaryCodePoint(body, position)) {
  313. position += 2;
  314. } else {
  315. break;
  316. }
  317. }
  318. return createToken(
  319. lexer,
  320. TokenKind.COMMENT,
  321. start,
  322. position,
  323. body.slice(start + 1, position),
  324. );
  325. }
  326. /**
  327. * Reads a number token from the source file, either a FloatValue or an IntValue
  328. * depending on whether a FractionalPart or ExponentPart is encountered.
  329. *
  330. * ```
  331. * IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
  332. *
  333. * IntegerPart ::
  334. * - NegativeSign? 0
  335. * - NegativeSign? NonZeroDigit Digit*
  336. *
  337. * NegativeSign :: -
  338. *
  339. * NonZeroDigit :: Digit but not `0`
  340. *
  341. * FloatValue ::
  342. * - IntegerPart FractionalPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
  343. * - IntegerPart FractionalPart [lookahead != {Digit, `.`, NameStart}]
  344. * - IntegerPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
  345. *
  346. * FractionalPart :: . Digit+
  347. *
  348. * ExponentPart :: ExponentIndicator Sign? Digit+
  349. *
  350. * ExponentIndicator :: one of `e` `E`
  351. *
  352. * Sign :: one of + -
  353. * ```
  354. */
  355. function readNumber(lexer, start, firstCode) {
  356. const body = lexer.source.body;
  357. let position = start;
  358. let code = firstCode;
  359. let isFloat = false; // NegativeSign (-)
  360. if (code === 0x002d) {
  361. code = body.charCodeAt(++position);
  362. } // Zero (0)
  363. if (code === 0x0030) {
  364. code = body.charCodeAt(++position);
  365. if (isDigit(code)) {
  366. throw syntaxError(
  367. lexer.source,
  368. position,
  369. `Invalid number, unexpected digit after 0: ${printCodePointAt(
  370. lexer,
  371. position,
  372. )}.`,
  373. );
  374. }
  375. } else {
  376. position = readDigits(lexer, position, code);
  377. code = body.charCodeAt(position);
  378. } // Full stop (.)
  379. if (code === 0x002e) {
  380. isFloat = true;
  381. code = body.charCodeAt(++position);
  382. position = readDigits(lexer, position, code);
  383. code = body.charCodeAt(position);
  384. } // E e
  385. if (code === 0x0045 || code === 0x0065) {
  386. isFloat = true;
  387. code = body.charCodeAt(++position); // + -
  388. if (code === 0x002b || code === 0x002d) {
  389. code = body.charCodeAt(++position);
  390. }
  391. position = readDigits(lexer, position, code);
  392. code = body.charCodeAt(position);
  393. } // Numbers cannot be followed by . or NameStart
  394. if (code === 0x002e || isNameStart(code)) {
  395. throw syntaxError(
  396. lexer.source,
  397. position,
  398. `Invalid number, expected digit but got: ${printCodePointAt(
  399. lexer,
  400. position,
  401. )}.`,
  402. );
  403. }
  404. return createToken(
  405. lexer,
  406. isFloat ? TokenKind.FLOAT : TokenKind.INT,
  407. start,
  408. position,
  409. body.slice(start, position),
  410. );
  411. }
  412. /**
  413. * Returns the new position in the source after reading one or more digits.
  414. */
  415. function readDigits(lexer, start, firstCode) {
  416. if (!isDigit(firstCode)) {
  417. throw syntaxError(
  418. lexer.source,
  419. start,
  420. `Invalid number, expected digit but got: ${printCodePointAt(
  421. lexer,
  422. start,
  423. )}.`,
  424. );
  425. }
  426. const body = lexer.source.body;
  427. let position = start + 1; // +1 to skip first firstCode
  428. while (isDigit(body.charCodeAt(position))) {
  429. ++position;
  430. }
  431. return position;
  432. }
  433. /**
  434. * Reads a single-quote string token from the source file.
  435. *
  436. * ```
  437. * StringValue ::
  438. * - `""` [lookahead != `"`]
  439. * - `"` StringCharacter+ `"`
  440. *
  441. * StringCharacter ::
  442. * - SourceCharacter but not `"` or `\` or LineTerminator
  443. * - `\u` EscapedUnicode
  444. * - `\` EscapedCharacter
  445. *
  446. * EscapedUnicode ::
  447. * - `{` HexDigit+ `}`
  448. * - HexDigit HexDigit HexDigit HexDigit
  449. *
  450. * EscapedCharacter :: one of `"` `\` `/` `b` `f` `n` `r` `t`
  451. * ```
  452. */
  453. function readString(lexer, start) {
  454. const body = lexer.source.body;
  455. const bodyLength = body.length;
  456. let position = start + 1;
  457. let chunkStart = position;
  458. let value = '';
  459. while (position < bodyLength) {
  460. const code = body.charCodeAt(position); // Closing Quote (")
  461. if (code === 0x0022) {
  462. value += body.slice(chunkStart, position);
  463. return createToken(lexer, TokenKind.STRING, start, position + 1, value);
  464. } // Escape Sequence (\)
  465. if (code === 0x005c) {
  466. value += body.slice(chunkStart, position);
  467. const escape =
  468. body.charCodeAt(position + 1) === 0x0075 // u
  469. ? body.charCodeAt(position + 2) === 0x007b // {
  470. ? readEscapedUnicodeVariableWidth(lexer, position)
  471. : readEscapedUnicodeFixedWidth(lexer, position)
  472. : readEscapedCharacter(lexer, position);
  473. value += escape.value;
  474. position += escape.size;
  475. chunkStart = position;
  476. continue;
  477. } // LineTerminator (\n | \r)
  478. if (code === 0x000a || code === 0x000d) {
  479. break;
  480. } // SourceCharacter
  481. if (isUnicodeScalarValue(code)) {
  482. ++position;
  483. } else if (isSupplementaryCodePoint(body, position)) {
  484. position += 2;
  485. } else {
  486. throw syntaxError(
  487. lexer.source,
  488. position,
  489. `Invalid character within String: ${printCodePointAt(
  490. lexer,
  491. position,
  492. )}.`,
  493. );
  494. }
  495. }
  496. throw syntaxError(lexer.source, position, 'Unterminated string.');
  497. } // The string value and lexed size of an escape sequence.
  498. function readEscapedUnicodeVariableWidth(lexer, position) {
  499. const body = lexer.source.body;
  500. let point = 0;
  501. let size = 3; // Cannot be larger than 12 chars (\u{00000000}).
  502. while (size < 12) {
  503. const code = body.charCodeAt(position + size++); // Closing Brace (})
  504. if (code === 0x007d) {
  505. // Must be at least 5 chars (\u{0}) and encode a Unicode scalar value.
  506. if (size < 5 || !isUnicodeScalarValue(point)) {
  507. break;
  508. }
  509. return {
  510. value: String.fromCodePoint(point),
  511. size,
  512. };
  513. } // Append this hex digit to the code point.
  514. point = (point << 4) | readHexDigit(code);
  515. if (point < 0) {
  516. break;
  517. }
  518. }
  519. throw syntaxError(
  520. lexer.source,
  521. position,
  522. `Invalid Unicode escape sequence: "${body.slice(
  523. position,
  524. position + size,
  525. )}".`,
  526. );
  527. }
  528. function readEscapedUnicodeFixedWidth(lexer, position) {
  529. const body = lexer.source.body;
  530. const code = read16BitHexCode(body, position + 2);
  531. if (isUnicodeScalarValue(code)) {
  532. return {
  533. value: String.fromCodePoint(code),
  534. size: 6,
  535. };
  536. } // GraphQL allows JSON-style surrogate pair escape sequences, but only when
  537. // a valid pair is formed.
  538. if (isLeadingSurrogate(code)) {
  539. // \u
  540. if (
  541. body.charCodeAt(position + 6) === 0x005c &&
  542. body.charCodeAt(position + 7) === 0x0075
  543. ) {
  544. const trailingCode = read16BitHexCode(body, position + 8);
  545. if (isTrailingSurrogate(trailingCode)) {
  546. // JavaScript defines strings as a sequence of UTF-16 code units and
  547. // encodes Unicode code points above U+FFFF using a surrogate pair of
  548. // code units. Since this is a surrogate pair escape sequence, just
  549. // include both codes into the JavaScript string value. Had JavaScript
  550. // not been internally based on UTF-16, then this surrogate pair would
  551. // be decoded to retrieve the supplementary code point.
  552. return {
  553. value: String.fromCodePoint(code, trailingCode),
  554. size: 12,
  555. };
  556. }
  557. }
  558. }
  559. throw syntaxError(
  560. lexer.source,
  561. position,
  562. `Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`,
  563. );
  564. }
  565. /**
  566. * Reads four hexadecimal characters and returns the positive integer that 16bit
  567. * hexadecimal string represents. For example, "000f" will return 15, and "dead"
  568. * will return 57005.
  569. *
  570. * Returns a negative number if any char was not a valid hexadecimal digit.
  571. */
  572. function read16BitHexCode(body, position) {
  573. // readHexDigit() returns -1 on error. ORing a negative value with any other
  574. // value always produces a negative value.
  575. return (
  576. (readHexDigit(body.charCodeAt(position)) << 12) |
  577. (readHexDigit(body.charCodeAt(position + 1)) << 8) |
  578. (readHexDigit(body.charCodeAt(position + 2)) << 4) |
  579. readHexDigit(body.charCodeAt(position + 3))
  580. );
  581. }
  582. /**
  583. * Reads a hexadecimal character and returns its positive integer value (0-15).
  584. *
  585. * '0' becomes 0, '9' becomes 9
  586. * 'A' becomes 10, 'F' becomes 15
  587. * 'a' becomes 10, 'f' becomes 15
  588. *
  589. * Returns -1 if the provided character code was not a valid hexadecimal digit.
  590. *
  591. * HexDigit :: one of
  592. * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
  593. * - `A` `B` `C` `D` `E` `F`
  594. * - `a` `b` `c` `d` `e` `f`
  595. */
  596. function readHexDigit(code) {
  597. return code >= 0x0030 && code <= 0x0039 // 0-9
  598. ? code - 0x0030
  599. : code >= 0x0041 && code <= 0x0046 // A-F
  600. ? code - 0x0037
  601. : code >= 0x0061 && code <= 0x0066 // a-f
  602. ? code - 0x0057
  603. : -1;
  604. }
  605. /**
  606. * | Escaped Character | Code Point | Character Name |
  607. * | ----------------- | ---------- | ---------------------------- |
  608. * | `"` | U+0022 | double quote |
  609. * | `\` | U+005C | reverse solidus (back slash) |
  610. * | `/` | U+002F | solidus (forward slash) |
  611. * | `b` | U+0008 | backspace |
  612. * | `f` | U+000C | form feed |
  613. * | `n` | U+000A | line feed (new line) |
  614. * | `r` | U+000D | carriage return |
  615. * | `t` | U+0009 | horizontal tab |
  616. */
  617. function readEscapedCharacter(lexer, position) {
  618. const body = lexer.source.body;
  619. const code = body.charCodeAt(position + 1);
  620. switch (code) {
  621. case 0x0022:
  622. // "
  623. return {
  624. value: '\u0022',
  625. size: 2,
  626. };
  627. case 0x005c:
  628. // \
  629. return {
  630. value: '\u005c',
  631. size: 2,
  632. };
  633. case 0x002f:
  634. // /
  635. return {
  636. value: '\u002f',
  637. size: 2,
  638. };
  639. case 0x0062:
  640. // b
  641. return {
  642. value: '\u0008',
  643. size: 2,
  644. };
  645. case 0x0066:
  646. // f
  647. return {
  648. value: '\u000c',
  649. size: 2,
  650. };
  651. case 0x006e:
  652. // n
  653. return {
  654. value: '\u000a',
  655. size: 2,
  656. };
  657. case 0x0072:
  658. // r
  659. return {
  660. value: '\u000d',
  661. size: 2,
  662. };
  663. case 0x0074:
  664. // t
  665. return {
  666. value: '\u0009',
  667. size: 2,
  668. };
  669. }
  670. throw syntaxError(
  671. lexer.source,
  672. position,
  673. `Invalid character escape sequence: "${body.slice(
  674. position,
  675. position + 2,
  676. )}".`,
  677. );
  678. }
  679. /**
  680. * Reads a block string token from the source file.
  681. *
  682. * ```
  683. * StringValue ::
  684. * - `"""` BlockStringCharacter* `"""`
  685. *
  686. * BlockStringCharacter ::
  687. * - SourceCharacter but not `"""` or `\"""`
  688. * - `\"""`
  689. * ```
  690. */
  691. function readBlockString(lexer, start) {
  692. const body = lexer.source.body;
  693. const bodyLength = body.length;
  694. let lineStart = lexer.lineStart;
  695. let position = start + 3;
  696. let chunkStart = position;
  697. let currentLine = '';
  698. const blockLines = [];
  699. while (position < bodyLength) {
  700. const code = body.charCodeAt(position); // Closing Triple-Quote (""")
  701. if (
  702. code === 0x0022 &&
  703. body.charCodeAt(position + 1) === 0x0022 &&
  704. body.charCodeAt(position + 2) === 0x0022
  705. ) {
  706. currentLine += body.slice(chunkStart, position);
  707. blockLines.push(currentLine);
  708. const token = createToken(
  709. lexer,
  710. TokenKind.BLOCK_STRING,
  711. start,
  712. position + 3, // Return a string of the lines joined with U+000A.
  713. dedentBlockStringLines(blockLines).join('\n'),
  714. );
  715. lexer.line += blockLines.length - 1;
  716. lexer.lineStart = lineStart;
  717. return token;
  718. } // Escaped Triple-Quote (\""")
  719. if (
  720. code === 0x005c &&
  721. body.charCodeAt(position + 1) === 0x0022 &&
  722. body.charCodeAt(position + 2) === 0x0022 &&
  723. body.charCodeAt(position + 3) === 0x0022
  724. ) {
  725. currentLine += body.slice(chunkStart, position);
  726. chunkStart = position + 1; // skip only slash
  727. position += 4;
  728. continue;
  729. } // LineTerminator
  730. if (code === 0x000a || code === 0x000d) {
  731. currentLine += body.slice(chunkStart, position);
  732. blockLines.push(currentLine);
  733. if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) {
  734. position += 2;
  735. } else {
  736. ++position;
  737. }
  738. currentLine = '';
  739. chunkStart = position;
  740. lineStart = position;
  741. continue;
  742. } // SourceCharacter
  743. if (isUnicodeScalarValue(code)) {
  744. ++position;
  745. } else if (isSupplementaryCodePoint(body, position)) {
  746. position += 2;
  747. } else {
  748. throw syntaxError(
  749. lexer.source,
  750. position,
  751. `Invalid character within String: ${printCodePointAt(
  752. lexer,
  753. position,
  754. )}.`,
  755. );
  756. }
  757. }
  758. throw syntaxError(lexer.source, position, 'Unterminated string.');
  759. }
  760. /**
  761. * Reads an alphanumeric + underscore name from the source.
  762. *
  763. * ```
  764. * Name ::
  765. * - NameStart NameContinue* [lookahead != NameContinue]
  766. * ```
  767. */
  768. function readName(lexer, start) {
  769. const body = lexer.source.body;
  770. const bodyLength = body.length;
  771. let position = start + 1;
  772. while (position < bodyLength) {
  773. const code = body.charCodeAt(position);
  774. if (isNameContinue(code)) {
  775. ++position;
  776. } else {
  777. break;
  778. }
  779. }
  780. return createToken(
  781. lexer,
  782. TokenKind.NAME,
  783. start,
  784. position,
  785. body.slice(start, position),
  786. );
  787. }