lexer.js 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. 'use strict';
  2. Object.defineProperty(exports, '__esModule', {
  3. value: true,
  4. });
  5. exports.Lexer = void 0;
  6. exports.isPunctuatorTokenKind = isPunctuatorTokenKind;
  7. var _syntaxError = require('../error/syntaxError.js');
  8. var _ast = require('./ast.js');
  9. var _blockString = require('./blockString.js');
  10. var _characterClasses = require('./characterClasses.js');
  11. var _tokenKind = require('./tokenKind.js');
  12. /**
  13. * Given a Source object, creates a Lexer for that source.
  14. * A Lexer is a stateful stream generator in that every time
  15. * it is advanced, it returns the next token in the Source. Assuming the
  16. * source lexes, the final Token emitted by the lexer will be of kind
  17. * EOF, after which the lexer will repeatedly return the same EOF token
  18. * whenever called.
  19. */
  20. class Lexer {
  21. /**
  22. * The previously focused non-ignored token.
  23. */
  24. /**
  25. * The currently focused non-ignored token.
  26. */
  27. /**
  28. * The (1-indexed) line containing the current token.
  29. */
  30. /**
  31. * The character offset at which the current line begins.
  32. */
  33. constructor(source) {
  34. const startOfFileToken = new _ast.Token(
  35. _tokenKind.TokenKind.SOF,
  36. 0,
  37. 0,
  38. 0,
  39. 0,
  40. );
  41. this.source = source;
  42. this.lastToken = startOfFileToken;
  43. this.token = startOfFileToken;
  44. this.line = 1;
  45. this.lineStart = 0;
  46. }
  47. get [Symbol.toStringTag]() {
  48. return 'Lexer';
  49. }
  50. /**
  51. * Advances the token stream to the next non-ignored token.
  52. */
  53. advance() {
  54. this.lastToken = this.token;
  55. const token = (this.token = this.lookahead());
  56. return token;
  57. }
  58. /**
  59. * Looks ahead and returns the next non-ignored token, but does not change
  60. * the state of Lexer.
  61. */
  62. lookahead() {
  63. let token = this.token;
  64. if (token.kind !== _tokenKind.TokenKind.EOF) {
  65. do {
  66. if (token.next) {
  67. token = token.next;
  68. } else {
  69. // Read the next token and form a link in the token linked-list.
  70. const nextToken = readNextToken(this, token.end); // @ts-expect-error next is only mutable during parsing.
  71. token.next = nextToken; // @ts-expect-error prev is only mutable during parsing.
  72. nextToken.prev = token;
  73. token = nextToken;
  74. }
  75. } while (token.kind === _tokenKind.TokenKind.COMMENT);
  76. }
  77. return token;
  78. }
  79. }
  80. /**
  81. * @internal
  82. */
  83. exports.Lexer = Lexer;
  84. function isPunctuatorTokenKind(kind) {
  85. return (
  86. kind === _tokenKind.TokenKind.BANG ||
  87. kind === _tokenKind.TokenKind.DOLLAR ||
  88. kind === _tokenKind.TokenKind.AMP ||
  89. kind === _tokenKind.TokenKind.PAREN_L ||
  90. kind === _tokenKind.TokenKind.PAREN_R ||
  91. kind === _tokenKind.TokenKind.SPREAD ||
  92. kind === _tokenKind.TokenKind.COLON ||
  93. kind === _tokenKind.TokenKind.EQUALS ||
  94. kind === _tokenKind.TokenKind.AT ||
  95. kind === _tokenKind.TokenKind.BRACKET_L ||
  96. kind === _tokenKind.TokenKind.BRACKET_R ||
  97. kind === _tokenKind.TokenKind.BRACE_L ||
  98. kind === _tokenKind.TokenKind.PIPE ||
  99. kind === _tokenKind.TokenKind.BRACE_R
  100. );
  101. }
  102. /**
  103. * A Unicode scalar value is any Unicode code point except surrogate code
  104. * points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and
  105. * 0xE000 to 0x10FFFF.
  106. *
  107. * SourceCharacter ::
  108. * - "Any Unicode scalar value"
  109. */
  110. function isUnicodeScalarValue(code) {
  111. return (
  112. (code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff)
  113. );
  114. }
  115. /**
  116. * The GraphQL specification defines source text as a sequence of unicode scalar
  117. * values (which Unicode defines to exclude surrogate code points). However
  118. * JavaScript defines strings as a sequence of UTF-16 code units which may
  119. * include surrogates. A surrogate pair is a valid source character as it
  120. * encodes a supplementary code point (above U+FFFF), but unpaired surrogate
  121. * code points are not valid source characters.
  122. */
  123. function isSupplementaryCodePoint(body, location) {
  124. return (
  125. isLeadingSurrogate(body.charCodeAt(location)) &&
  126. isTrailingSurrogate(body.charCodeAt(location + 1))
  127. );
  128. }
  129. function isLeadingSurrogate(code) {
  130. return code >= 0xd800 && code <= 0xdbff;
  131. }
  132. function isTrailingSurrogate(code) {
  133. return code >= 0xdc00 && code <= 0xdfff;
  134. }
  135. /**
  136. * Prints the code point (or end of file reference) at a given location in a
  137. * source for use in error messages.
  138. *
  139. * Printable ASCII is printed quoted, while other points are printed in Unicode
  140. * code point form (ie. U+1234).
  141. */
  142. function printCodePointAt(lexer, location) {
  143. const code = lexer.source.body.codePointAt(location);
  144. if (code === undefined) {
  145. return _tokenKind.TokenKind.EOF;
  146. } else if (code >= 0x0020 && code <= 0x007e) {
  147. // Printable ASCII
  148. const char = String.fromCodePoint(code);
  149. return char === '"' ? "'\"'" : `"${char}"`;
  150. } // Unicode code point
  151. return 'U+' + code.toString(16).toUpperCase().padStart(4, '0');
  152. }
  153. /**
  154. * Create a token with line and column location information.
  155. */
  156. function createToken(lexer, kind, start, end, value) {
  157. const line = lexer.line;
  158. const col = 1 + start - lexer.lineStart;
  159. return new _ast.Token(kind, start, end, line, col, value);
  160. }
  161. /**
  162. * Gets the next token from the source starting at the given position.
  163. *
  164. * This skips over whitespace until it finds the next lexable token, then lexes
  165. * punctuators immediately or calls the appropriate helper function for more
  166. * complicated tokens.
  167. */
  168. function readNextToken(lexer, start) {
  169. const body = lexer.source.body;
  170. const bodyLength = body.length;
  171. let position = start;
  172. while (position < bodyLength) {
  173. const code = body.charCodeAt(position); // SourceCharacter
  174. switch (code) {
  175. // Ignored ::
  176. // - UnicodeBOM
  177. // - WhiteSpace
  178. // - LineTerminator
  179. // - Comment
  180. // - Comma
  181. //
  182. // UnicodeBOM :: "Byte Order Mark (U+FEFF)"
  183. //
  184. // WhiteSpace ::
  185. // - "Horizontal Tab (U+0009)"
  186. // - "Space (U+0020)"
  187. //
  188. // Comma :: ,
  189. case 0xfeff: // <BOM>
  190. case 0x0009: // \t
  191. case 0x0020: // <space>
  192. case 0x002c:
  193. // ,
  194. ++position;
  195. continue;
  196. // LineTerminator ::
  197. // - "New Line (U+000A)"
  198. // - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"]
  199. // - "Carriage Return (U+000D)" "New Line (U+000A)"
  200. case 0x000a:
  201. // \n
  202. ++position;
  203. ++lexer.line;
  204. lexer.lineStart = position;
  205. continue;
  206. case 0x000d:
  207. // \r
  208. if (body.charCodeAt(position + 1) === 0x000a) {
  209. position += 2;
  210. } else {
  211. ++position;
  212. }
  213. ++lexer.line;
  214. lexer.lineStart = position;
  215. continue;
  216. // Comment
  217. case 0x0023:
  218. // #
  219. return readComment(lexer, position);
  220. // Token ::
  221. // - Punctuator
  222. // - Name
  223. // - IntValue
  224. // - FloatValue
  225. // - StringValue
  226. //
  227. // Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | }
  228. case 0x0021:
  229. // !
  230. return createToken(
  231. lexer,
  232. _tokenKind.TokenKind.BANG,
  233. position,
  234. position + 1,
  235. );
  236. case 0x0024:
  237. // $
  238. return createToken(
  239. lexer,
  240. _tokenKind.TokenKind.DOLLAR,
  241. position,
  242. position + 1,
  243. );
  244. case 0x0026:
  245. // &
  246. return createToken(
  247. lexer,
  248. _tokenKind.TokenKind.AMP,
  249. position,
  250. position + 1,
  251. );
  252. case 0x0028:
  253. // (
  254. return createToken(
  255. lexer,
  256. _tokenKind.TokenKind.PAREN_L,
  257. position,
  258. position + 1,
  259. );
  260. case 0x0029:
  261. // )
  262. return createToken(
  263. lexer,
  264. _tokenKind.TokenKind.PAREN_R,
  265. position,
  266. position + 1,
  267. );
  268. case 0x002e:
  269. // .
  270. if (
  271. body.charCodeAt(position + 1) === 0x002e &&
  272. body.charCodeAt(position + 2) === 0x002e
  273. ) {
  274. return createToken(
  275. lexer,
  276. _tokenKind.TokenKind.SPREAD,
  277. position,
  278. position + 3,
  279. );
  280. }
  281. break;
  282. case 0x003a:
  283. // :
  284. return createToken(
  285. lexer,
  286. _tokenKind.TokenKind.COLON,
  287. position,
  288. position + 1,
  289. );
  290. case 0x003d:
  291. // =
  292. return createToken(
  293. lexer,
  294. _tokenKind.TokenKind.EQUALS,
  295. position,
  296. position + 1,
  297. );
  298. case 0x0040:
  299. // @
  300. return createToken(
  301. lexer,
  302. _tokenKind.TokenKind.AT,
  303. position,
  304. position + 1,
  305. );
  306. case 0x005b:
  307. // [
  308. return createToken(
  309. lexer,
  310. _tokenKind.TokenKind.BRACKET_L,
  311. position,
  312. position + 1,
  313. );
  314. case 0x005d:
  315. // ]
  316. return createToken(
  317. lexer,
  318. _tokenKind.TokenKind.BRACKET_R,
  319. position,
  320. position + 1,
  321. );
  322. case 0x007b:
  323. // {
  324. return createToken(
  325. lexer,
  326. _tokenKind.TokenKind.BRACE_L,
  327. position,
  328. position + 1,
  329. );
  330. case 0x007c:
  331. // |
  332. return createToken(
  333. lexer,
  334. _tokenKind.TokenKind.PIPE,
  335. position,
  336. position + 1,
  337. );
  338. case 0x007d:
  339. // }
  340. return createToken(
  341. lexer,
  342. _tokenKind.TokenKind.BRACE_R,
  343. position,
  344. position + 1,
  345. );
  346. // StringValue
  347. case 0x0022:
  348. // "
  349. if (
  350. body.charCodeAt(position + 1) === 0x0022 &&
  351. body.charCodeAt(position + 2) === 0x0022
  352. ) {
  353. return readBlockString(lexer, position);
  354. }
  355. return readString(lexer, position);
  356. } // IntValue | FloatValue (Digit | -)
  357. if ((0, _characterClasses.isDigit)(code) || code === 0x002d) {
  358. return readNumber(lexer, position, code);
  359. } // Name
  360. if ((0, _characterClasses.isNameStart)(code)) {
  361. return readName(lexer, position);
  362. }
  363. throw (0, _syntaxError.syntaxError)(
  364. lexer.source,
  365. position,
  366. code === 0x0027
  367. ? 'Unexpected single quote character (\'), did you mean to use a double quote (")?'
  368. : isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position)
  369. ? `Unexpected character: ${printCodePointAt(lexer, position)}.`
  370. : `Invalid character: ${printCodePointAt(lexer, position)}.`,
  371. );
  372. }
  373. return createToken(lexer, _tokenKind.TokenKind.EOF, bodyLength, bodyLength);
  374. }
  375. /**
  376. * Reads a comment token from the source file.
  377. *
  378. * ```
  379. * Comment :: # CommentChar* [lookahead != CommentChar]
  380. *
  381. * CommentChar :: SourceCharacter but not LineTerminator
  382. * ```
  383. */
  384. function readComment(lexer, start) {
  385. const body = lexer.source.body;
  386. const bodyLength = body.length;
  387. let position = start + 1;
  388. while (position < bodyLength) {
  389. const code = body.charCodeAt(position); // LineTerminator (\n | \r)
  390. if (code === 0x000a || code === 0x000d) {
  391. break;
  392. } // SourceCharacter
  393. if (isUnicodeScalarValue(code)) {
  394. ++position;
  395. } else if (isSupplementaryCodePoint(body, position)) {
  396. position += 2;
  397. } else {
  398. break;
  399. }
  400. }
  401. return createToken(
  402. lexer,
  403. _tokenKind.TokenKind.COMMENT,
  404. start,
  405. position,
  406. body.slice(start + 1, position),
  407. );
  408. }
  409. /**
  410. * Reads a number token from the source file, either a FloatValue or an IntValue
  411. * depending on whether a FractionalPart or ExponentPart is encountered.
  412. *
  413. * ```
  414. * IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}]
  415. *
  416. * IntegerPart ::
  417. * - NegativeSign? 0
  418. * - NegativeSign? NonZeroDigit Digit*
  419. *
  420. * NegativeSign :: -
  421. *
  422. * NonZeroDigit :: Digit but not `0`
  423. *
  424. * FloatValue ::
  425. * - IntegerPart FractionalPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
  426. * - IntegerPart FractionalPart [lookahead != {Digit, `.`, NameStart}]
  427. * - IntegerPart ExponentPart [lookahead != {Digit, `.`, NameStart}]
  428. *
  429. * FractionalPart :: . Digit+
  430. *
  431. * ExponentPart :: ExponentIndicator Sign? Digit+
  432. *
  433. * ExponentIndicator :: one of `e` `E`
  434. *
  435. * Sign :: one of + -
  436. * ```
  437. */
  438. function readNumber(lexer, start, firstCode) {
  439. const body = lexer.source.body;
  440. let position = start;
  441. let code = firstCode;
  442. let isFloat = false; // NegativeSign (-)
  443. if (code === 0x002d) {
  444. code = body.charCodeAt(++position);
  445. } // Zero (0)
  446. if (code === 0x0030) {
  447. code = body.charCodeAt(++position);
  448. if ((0, _characterClasses.isDigit)(code)) {
  449. throw (0, _syntaxError.syntaxError)(
  450. lexer.source,
  451. position,
  452. `Invalid number, unexpected digit after 0: ${printCodePointAt(
  453. lexer,
  454. position,
  455. )}.`,
  456. );
  457. }
  458. } else {
  459. position = readDigits(lexer, position, code);
  460. code = body.charCodeAt(position);
  461. } // Full stop (.)
  462. if (code === 0x002e) {
  463. isFloat = true;
  464. code = body.charCodeAt(++position);
  465. position = readDigits(lexer, position, code);
  466. code = body.charCodeAt(position);
  467. } // E e
  468. if (code === 0x0045 || code === 0x0065) {
  469. isFloat = true;
  470. code = body.charCodeAt(++position); // + -
  471. if (code === 0x002b || code === 0x002d) {
  472. code = body.charCodeAt(++position);
  473. }
  474. position = readDigits(lexer, position, code);
  475. code = body.charCodeAt(position);
  476. } // Numbers cannot be followed by . or NameStart
  477. if (code === 0x002e || (0, _characterClasses.isNameStart)(code)) {
  478. throw (0, _syntaxError.syntaxError)(
  479. lexer.source,
  480. position,
  481. `Invalid number, expected digit but got: ${printCodePointAt(
  482. lexer,
  483. position,
  484. )}.`,
  485. );
  486. }
  487. return createToken(
  488. lexer,
  489. isFloat ? _tokenKind.TokenKind.FLOAT : _tokenKind.TokenKind.INT,
  490. start,
  491. position,
  492. body.slice(start, position),
  493. );
  494. }
  495. /**
  496. * Returns the new position in the source after reading one or more digits.
  497. */
  498. function readDigits(lexer, start, firstCode) {
  499. if (!(0, _characterClasses.isDigit)(firstCode)) {
  500. throw (0, _syntaxError.syntaxError)(
  501. lexer.source,
  502. start,
  503. `Invalid number, expected digit but got: ${printCodePointAt(
  504. lexer,
  505. start,
  506. )}.`,
  507. );
  508. }
  509. const body = lexer.source.body;
  510. let position = start + 1; // +1 to skip first firstCode
  511. while ((0, _characterClasses.isDigit)(body.charCodeAt(position))) {
  512. ++position;
  513. }
  514. return position;
  515. }
  516. /**
  517. * Reads a single-quote string token from the source file.
  518. *
  519. * ```
  520. * StringValue ::
  521. * - `""` [lookahead != `"`]
  522. * - `"` StringCharacter+ `"`
  523. *
  524. * StringCharacter ::
  525. * - SourceCharacter but not `"` or `\` or LineTerminator
  526. * - `\u` EscapedUnicode
  527. * - `\` EscapedCharacter
  528. *
  529. * EscapedUnicode ::
  530. * - `{` HexDigit+ `}`
  531. * - HexDigit HexDigit HexDigit HexDigit
  532. *
  533. * EscapedCharacter :: one of `"` `\` `/` `b` `f` `n` `r` `t`
  534. * ```
  535. */
  536. function readString(lexer, start) {
  537. const body = lexer.source.body;
  538. const bodyLength = body.length;
  539. let position = start + 1;
  540. let chunkStart = position;
  541. let value = '';
  542. while (position < bodyLength) {
  543. const code = body.charCodeAt(position); // Closing Quote (")
  544. if (code === 0x0022) {
  545. value += body.slice(chunkStart, position);
  546. return createToken(
  547. lexer,
  548. _tokenKind.TokenKind.STRING,
  549. start,
  550. position + 1,
  551. value,
  552. );
  553. } // Escape Sequence (\)
  554. if (code === 0x005c) {
  555. value += body.slice(chunkStart, position);
  556. const escape =
  557. body.charCodeAt(position + 1) === 0x0075 // u
  558. ? body.charCodeAt(position + 2) === 0x007b // {
  559. ? readEscapedUnicodeVariableWidth(lexer, position)
  560. : readEscapedUnicodeFixedWidth(lexer, position)
  561. : readEscapedCharacter(lexer, position);
  562. value += escape.value;
  563. position += escape.size;
  564. chunkStart = position;
  565. continue;
  566. } // LineTerminator (\n | \r)
  567. if (code === 0x000a || code === 0x000d) {
  568. break;
  569. } // SourceCharacter
  570. if (isUnicodeScalarValue(code)) {
  571. ++position;
  572. } else if (isSupplementaryCodePoint(body, position)) {
  573. position += 2;
  574. } else {
  575. throw (0, _syntaxError.syntaxError)(
  576. lexer.source,
  577. position,
  578. `Invalid character within String: ${printCodePointAt(
  579. lexer,
  580. position,
  581. )}.`,
  582. );
  583. }
  584. }
  585. throw (0, _syntaxError.syntaxError)(
  586. lexer.source,
  587. position,
  588. 'Unterminated string.',
  589. );
  590. } // The string value and lexed size of an escape sequence.
  591. function readEscapedUnicodeVariableWidth(lexer, position) {
  592. const body = lexer.source.body;
  593. let point = 0;
  594. let size = 3; // Cannot be larger than 12 chars (\u{00000000}).
  595. while (size < 12) {
  596. const code = body.charCodeAt(position + size++); // Closing Brace (})
  597. if (code === 0x007d) {
  598. // Must be at least 5 chars (\u{0}) and encode a Unicode scalar value.
  599. if (size < 5 || !isUnicodeScalarValue(point)) {
  600. break;
  601. }
  602. return {
  603. value: String.fromCodePoint(point),
  604. size,
  605. };
  606. } // Append this hex digit to the code point.
  607. point = (point << 4) | readHexDigit(code);
  608. if (point < 0) {
  609. break;
  610. }
  611. }
  612. throw (0, _syntaxError.syntaxError)(
  613. lexer.source,
  614. position,
  615. `Invalid Unicode escape sequence: "${body.slice(
  616. position,
  617. position + size,
  618. )}".`,
  619. );
  620. }
  621. function readEscapedUnicodeFixedWidth(lexer, position) {
  622. const body = lexer.source.body;
  623. const code = read16BitHexCode(body, position + 2);
  624. if (isUnicodeScalarValue(code)) {
  625. return {
  626. value: String.fromCodePoint(code),
  627. size: 6,
  628. };
  629. } // GraphQL allows JSON-style surrogate pair escape sequences, but only when
  630. // a valid pair is formed.
  631. if (isLeadingSurrogate(code)) {
  632. // \u
  633. if (
  634. body.charCodeAt(position + 6) === 0x005c &&
  635. body.charCodeAt(position + 7) === 0x0075
  636. ) {
  637. const trailingCode = read16BitHexCode(body, position + 8);
  638. if (isTrailingSurrogate(trailingCode)) {
  639. // JavaScript defines strings as a sequence of UTF-16 code units and
  640. // encodes Unicode code points above U+FFFF using a surrogate pair of
  641. // code units. Since this is a surrogate pair escape sequence, just
  642. // include both codes into the JavaScript string value. Had JavaScript
  643. // not been internally based on UTF-16, then this surrogate pair would
  644. // be decoded to retrieve the supplementary code point.
  645. return {
  646. value: String.fromCodePoint(code, trailingCode),
  647. size: 12,
  648. };
  649. }
  650. }
  651. }
  652. throw (0, _syntaxError.syntaxError)(
  653. lexer.source,
  654. position,
  655. `Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`,
  656. );
  657. }
  658. /**
  659. * Reads four hexadecimal characters and returns the positive integer that 16bit
  660. * hexadecimal string represents. For example, "000f" will return 15, and "dead"
  661. * will return 57005.
  662. *
  663. * Returns a negative number if any char was not a valid hexadecimal digit.
  664. */
  665. function read16BitHexCode(body, position) {
  666. // readHexDigit() returns -1 on error. ORing a negative value with any other
  667. // value always produces a negative value.
  668. return (
  669. (readHexDigit(body.charCodeAt(position)) << 12) |
  670. (readHexDigit(body.charCodeAt(position + 1)) << 8) |
  671. (readHexDigit(body.charCodeAt(position + 2)) << 4) |
  672. readHexDigit(body.charCodeAt(position + 3))
  673. );
  674. }
  675. /**
  676. * Reads a hexadecimal character and returns its positive integer value (0-15).
  677. *
  678. * '0' becomes 0, '9' becomes 9
  679. * 'A' becomes 10, 'F' becomes 15
  680. * 'a' becomes 10, 'f' becomes 15
  681. *
  682. * Returns -1 if the provided character code was not a valid hexadecimal digit.
  683. *
  684. * HexDigit :: one of
  685. * - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9`
  686. * - `A` `B` `C` `D` `E` `F`
  687. * - `a` `b` `c` `d` `e` `f`
  688. */
  689. function readHexDigit(code) {
  690. return code >= 0x0030 && code <= 0x0039 // 0-9
  691. ? code - 0x0030
  692. : code >= 0x0041 && code <= 0x0046 // A-F
  693. ? code - 0x0037
  694. : code >= 0x0061 && code <= 0x0066 // a-f
  695. ? code - 0x0057
  696. : -1;
  697. }
  698. /**
  699. * | Escaped Character | Code Point | Character Name |
  700. * | ----------------- | ---------- | ---------------------------- |
  701. * | `"` | U+0022 | double quote |
  702. * | `\` | U+005C | reverse solidus (back slash) |
  703. * | `/` | U+002F | solidus (forward slash) |
  704. * | `b` | U+0008 | backspace |
  705. * | `f` | U+000C | form feed |
  706. * | `n` | U+000A | line feed (new line) |
  707. * | `r` | U+000D | carriage return |
  708. * | `t` | U+0009 | horizontal tab |
  709. */
  710. function readEscapedCharacter(lexer, position) {
  711. const body = lexer.source.body;
  712. const code = body.charCodeAt(position + 1);
  713. switch (code) {
  714. case 0x0022:
  715. // "
  716. return {
  717. value: '\u0022',
  718. size: 2,
  719. };
  720. case 0x005c:
  721. // \
  722. return {
  723. value: '\u005c',
  724. size: 2,
  725. };
  726. case 0x002f:
  727. // /
  728. return {
  729. value: '\u002f',
  730. size: 2,
  731. };
  732. case 0x0062:
  733. // b
  734. return {
  735. value: '\u0008',
  736. size: 2,
  737. };
  738. case 0x0066:
  739. // f
  740. return {
  741. value: '\u000c',
  742. size: 2,
  743. };
  744. case 0x006e:
  745. // n
  746. return {
  747. value: '\u000a',
  748. size: 2,
  749. };
  750. case 0x0072:
  751. // r
  752. return {
  753. value: '\u000d',
  754. size: 2,
  755. };
  756. case 0x0074:
  757. // t
  758. return {
  759. value: '\u0009',
  760. size: 2,
  761. };
  762. }
  763. throw (0, _syntaxError.syntaxError)(
  764. lexer.source,
  765. position,
  766. `Invalid character escape sequence: "${body.slice(
  767. position,
  768. position + 2,
  769. )}".`,
  770. );
  771. }
  772. /**
  773. * Reads a block string token from the source file.
  774. *
  775. * ```
  776. * StringValue ::
  777. * - `"""` BlockStringCharacter* `"""`
  778. *
  779. * BlockStringCharacter ::
  780. * - SourceCharacter but not `"""` or `\"""`
  781. * - `\"""`
  782. * ```
  783. */
  784. function readBlockString(lexer, start) {
  785. const body = lexer.source.body;
  786. const bodyLength = body.length;
  787. let lineStart = lexer.lineStart;
  788. let position = start + 3;
  789. let chunkStart = position;
  790. let currentLine = '';
  791. const blockLines = [];
  792. while (position < bodyLength) {
  793. const code = body.charCodeAt(position); // Closing Triple-Quote (""")
  794. if (
  795. code === 0x0022 &&
  796. body.charCodeAt(position + 1) === 0x0022 &&
  797. body.charCodeAt(position + 2) === 0x0022
  798. ) {
  799. currentLine += body.slice(chunkStart, position);
  800. blockLines.push(currentLine);
  801. const token = createToken(
  802. lexer,
  803. _tokenKind.TokenKind.BLOCK_STRING,
  804. start,
  805. position + 3, // Return a string of the lines joined with U+000A.
  806. (0, _blockString.dedentBlockStringLines)(blockLines).join('\n'),
  807. );
  808. lexer.line += blockLines.length - 1;
  809. lexer.lineStart = lineStart;
  810. return token;
  811. } // Escaped Triple-Quote (\""")
  812. if (
  813. code === 0x005c &&
  814. body.charCodeAt(position + 1) === 0x0022 &&
  815. body.charCodeAt(position + 2) === 0x0022 &&
  816. body.charCodeAt(position + 3) === 0x0022
  817. ) {
  818. currentLine += body.slice(chunkStart, position);
  819. chunkStart = position + 1; // skip only slash
  820. position += 4;
  821. continue;
  822. } // LineTerminator
  823. if (code === 0x000a || code === 0x000d) {
  824. currentLine += body.slice(chunkStart, position);
  825. blockLines.push(currentLine);
  826. if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) {
  827. position += 2;
  828. } else {
  829. ++position;
  830. }
  831. currentLine = '';
  832. chunkStart = position;
  833. lineStart = position;
  834. continue;
  835. } // SourceCharacter
  836. if (isUnicodeScalarValue(code)) {
  837. ++position;
  838. } else if (isSupplementaryCodePoint(body, position)) {
  839. position += 2;
  840. } else {
  841. throw (0, _syntaxError.syntaxError)(
  842. lexer.source,
  843. position,
  844. `Invalid character within String: ${printCodePointAt(
  845. lexer,
  846. position,
  847. )}.`,
  848. );
  849. }
  850. }
  851. throw (0, _syntaxError.syntaxError)(
  852. lexer.source,
  853. position,
  854. 'Unterminated string.',
  855. );
  856. }
  857. /**
  858. * Reads an alphanumeric + underscore name from the source.
  859. *
  860. * ```
  861. * Name ::
  862. * - NameStart NameContinue* [lookahead != NameContinue]
  863. * ```
  864. */
  865. function readName(lexer, start) {
  866. const body = lexer.source.body;
  867. const bodyLength = body.length;
  868. let position = start + 1;
  869. while (position < bodyLength) {
  870. const code = body.charCodeAt(position);
  871. if ((0, _characterClasses.isNameContinue)(code)) {
  872. ++position;
  873. } else {
  874. break;
  875. }
  876. }
  877. return createToken(
  878. lexer,
  879. _tokenKind.TokenKind.NAME,
  880. start,
  881. position,
  882. body.slice(start, position),
  883. );
  884. }