1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006 |
- 'use strict';
- Object.defineProperty(exports, '__esModule', {
- value: true,
- });
- exports.Lexer = void 0;
- exports.isPunctuatorTokenKind = isPunctuatorTokenKind;
- var _syntaxError = require('../error/syntaxError.js');
- var _ast = require('./ast.js');
- var _blockString = require('./blockString.js');
- var _characterClasses = require('./characterClasses.js');
- var _tokenKind = require('./tokenKind.js');
- class Lexer {
-
-
-
-
- constructor(source) {
- const startOfFileToken = new _ast.Token(
- _tokenKind.TokenKind.SOF,
- 0,
- 0,
- 0,
- 0,
- );
- this.source = source;
- this.lastToken = startOfFileToken;
- this.token = startOfFileToken;
- this.line = 1;
- this.lineStart = 0;
- }
- get [Symbol.toStringTag]() {
- return 'Lexer';
- }
-
- advance() {
- this.lastToken = this.token;
- const token = (this.token = this.lookahead());
- return token;
- }
-
- lookahead() {
- let token = this.token;
- if (token.kind !== _tokenKind.TokenKind.EOF) {
- do {
- if (token.next) {
- token = token.next;
- } else {
-
- const nextToken = readNextToken(this, token.end);
- token.next = nextToken;
- nextToken.prev = token;
- token = nextToken;
- }
- } while (token.kind === _tokenKind.TokenKind.COMMENT);
- }
- return token;
- }
- }
- exports.Lexer = Lexer;
- function isPunctuatorTokenKind(kind) {
- return (
- kind === _tokenKind.TokenKind.BANG ||
- kind === _tokenKind.TokenKind.DOLLAR ||
- kind === _tokenKind.TokenKind.AMP ||
- kind === _tokenKind.TokenKind.PAREN_L ||
- kind === _tokenKind.TokenKind.PAREN_R ||
- kind === _tokenKind.TokenKind.SPREAD ||
- kind === _tokenKind.TokenKind.COLON ||
- kind === _tokenKind.TokenKind.EQUALS ||
- kind === _tokenKind.TokenKind.AT ||
- kind === _tokenKind.TokenKind.BRACKET_L ||
- kind === _tokenKind.TokenKind.BRACKET_R ||
- kind === _tokenKind.TokenKind.BRACE_L ||
- kind === _tokenKind.TokenKind.PIPE ||
- kind === _tokenKind.TokenKind.BRACE_R
- );
- }
- function isUnicodeScalarValue(code) {
- return (
- (code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff)
- );
- }
- function isSupplementaryCodePoint(body, location) {
- return (
- isLeadingSurrogate(body.charCodeAt(location)) &&
- isTrailingSurrogate(body.charCodeAt(location + 1))
- );
- }
- function isLeadingSurrogate(code) {
- return code >= 0xd800 && code <= 0xdbff;
- }
- function isTrailingSurrogate(code) {
- return code >= 0xdc00 && code <= 0xdfff;
- }
- function printCodePointAt(lexer, location) {
- const code = lexer.source.body.codePointAt(location);
- if (code === undefined) {
- return _tokenKind.TokenKind.EOF;
- } else if (code >= 0x0020 && code <= 0x007e) {
-
- const char = String.fromCodePoint(code);
- return char === '"' ? "'\"'" : `"${char}"`;
- }
- return 'U+' + code.toString(16).toUpperCase().padStart(4, '0');
- }
- function createToken(lexer, kind, start, end, value) {
- const line = lexer.line;
- const col = 1 + start - lexer.lineStart;
- return new _ast.Token(kind, start, end, line, col, value);
- }
- function readNextToken(lexer, start) {
- const body = lexer.source.body;
- const bodyLength = body.length;
- let position = start;
- while (position < bodyLength) {
- const code = body.charCodeAt(position);
- switch (code) {
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- case 0xfeff:
- case 0x0009:
- case 0x0020:
- case 0x002c:
-
- ++position;
- continue;
-
-
-
-
- case 0x000a:
-
- ++position;
- ++lexer.line;
- lexer.lineStart = position;
- continue;
- case 0x000d:
-
- if (body.charCodeAt(position + 1) === 0x000a) {
- position += 2;
- } else {
- ++position;
- }
- ++lexer.line;
- lexer.lineStart = position;
- continue;
-
- case 0x0023:
-
- return readComment(lexer, position);
-
-
-
-
-
-
-
-
- case 0x0021:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.BANG,
- position,
- position + 1,
- );
- case 0x0024:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.DOLLAR,
- position,
- position + 1,
- );
- case 0x0026:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.AMP,
- position,
- position + 1,
- );
- case 0x0028:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.PAREN_L,
- position,
- position + 1,
- );
- case 0x0029:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.PAREN_R,
- position,
- position + 1,
- );
- case 0x002e:
-
- if (
- body.charCodeAt(position + 1) === 0x002e &&
- body.charCodeAt(position + 2) === 0x002e
- ) {
- return createToken(
- lexer,
- _tokenKind.TokenKind.SPREAD,
- position,
- position + 3,
- );
- }
- break;
- case 0x003a:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.COLON,
- position,
- position + 1,
- );
- case 0x003d:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.EQUALS,
- position,
- position + 1,
- );
- case 0x0040:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.AT,
- position,
- position + 1,
- );
- case 0x005b:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.BRACKET_L,
- position,
- position + 1,
- );
- case 0x005d:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.BRACKET_R,
- position,
- position + 1,
- );
- case 0x007b:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.BRACE_L,
- position,
- position + 1,
- );
- case 0x007c:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.PIPE,
- position,
- position + 1,
- );
- case 0x007d:
-
- return createToken(
- lexer,
- _tokenKind.TokenKind.BRACE_R,
- position,
- position + 1,
- );
-
- case 0x0022:
-
- if (
- body.charCodeAt(position + 1) === 0x0022 &&
- body.charCodeAt(position + 2) === 0x0022
- ) {
- return readBlockString(lexer, position);
- }
- return readString(lexer, position);
- }
- if ((0, _characterClasses.isDigit)(code) || code === 0x002d) {
- return readNumber(lexer, position, code);
- }
- if ((0, _characterClasses.isNameStart)(code)) {
- return readName(lexer, position);
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- code === 0x0027
- ? 'Unexpected single quote character (\'), did you mean to use a double quote (")?'
- : isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position)
- ? `Unexpected character: ${printCodePointAt(lexer, position)}.`
- : `Invalid character: ${printCodePointAt(lexer, position)}.`,
- );
- }
- return createToken(lexer, _tokenKind.TokenKind.EOF, bodyLength, bodyLength);
- }
- function readComment(lexer, start) {
- const body = lexer.source.body;
- const bodyLength = body.length;
- let position = start + 1;
- while (position < bodyLength) {
- const code = body.charCodeAt(position);
- if (code === 0x000a || code === 0x000d) {
- break;
- }
- if (isUnicodeScalarValue(code)) {
- ++position;
- } else if (isSupplementaryCodePoint(body, position)) {
- position += 2;
- } else {
- break;
- }
- }
- return createToken(
- lexer,
- _tokenKind.TokenKind.COMMENT,
- start,
- position,
- body.slice(start + 1, position),
- );
- }
- function readNumber(lexer, start, firstCode) {
- const body = lexer.source.body;
- let position = start;
- let code = firstCode;
- let isFloat = false;
- if (code === 0x002d) {
- code = body.charCodeAt(++position);
- }
- if (code === 0x0030) {
- code = body.charCodeAt(++position);
- if ((0, _characterClasses.isDigit)(code)) {
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid number, unexpected digit after 0: ${printCodePointAt(
- lexer,
- position,
- )}.`,
- );
- }
- } else {
- position = readDigits(lexer, position, code);
- code = body.charCodeAt(position);
- }
- if (code === 0x002e) {
- isFloat = true;
- code = body.charCodeAt(++position);
- position = readDigits(lexer, position, code);
- code = body.charCodeAt(position);
- }
- if (code === 0x0045 || code === 0x0065) {
- isFloat = true;
- code = body.charCodeAt(++position);
- if (code === 0x002b || code === 0x002d) {
- code = body.charCodeAt(++position);
- }
- position = readDigits(lexer, position, code);
- code = body.charCodeAt(position);
- }
- if (code === 0x002e || (0, _characterClasses.isNameStart)(code)) {
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid number, expected digit but got: ${printCodePointAt(
- lexer,
- position,
- )}.`,
- );
- }
- return createToken(
- lexer,
- isFloat ? _tokenKind.TokenKind.FLOAT : _tokenKind.TokenKind.INT,
- start,
- position,
- body.slice(start, position),
- );
- }
- function readDigits(lexer, start, firstCode) {
- if (!(0, _characterClasses.isDigit)(firstCode)) {
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- start,
- `Invalid number, expected digit but got: ${printCodePointAt(
- lexer,
- start,
- )}.`,
- );
- }
- const body = lexer.source.body;
- let position = start + 1;
- while ((0, _characterClasses.isDigit)(body.charCodeAt(position))) {
- ++position;
- }
- return position;
- }
- function readString(lexer, start) {
- const body = lexer.source.body;
- const bodyLength = body.length;
- let position = start + 1;
- let chunkStart = position;
- let value = '';
- while (position < bodyLength) {
- const code = body.charCodeAt(position);
- if (code === 0x0022) {
- value += body.slice(chunkStart, position);
- return createToken(
- lexer,
- _tokenKind.TokenKind.STRING,
- start,
- position + 1,
- value,
- );
- }
- if (code === 0x005c) {
- value += body.slice(chunkStart, position);
- const escape =
- body.charCodeAt(position + 1) === 0x0075
- ? body.charCodeAt(position + 2) === 0x007b
- ? readEscapedUnicodeVariableWidth(lexer, position)
- : readEscapedUnicodeFixedWidth(lexer, position)
- : readEscapedCharacter(lexer, position);
- value += escape.value;
- position += escape.size;
- chunkStart = position;
- continue;
- }
- if (code === 0x000a || code === 0x000d) {
- break;
- }
- if (isUnicodeScalarValue(code)) {
- ++position;
- } else if (isSupplementaryCodePoint(body, position)) {
- position += 2;
- } else {
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid character within String: ${printCodePointAt(
- lexer,
- position,
- )}.`,
- );
- }
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- 'Unterminated string.',
- );
- }
- function readEscapedUnicodeVariableWidth(lexer, position) {
- const body = lexer.source.body;
- let point = 0;
- let size = 3;
- while (size < 12) {
- const code = body.charCodeAt(position + size++);
- if (code === 0x007d) {
-
- if (size < 5 || !isUnicodeScalarValue(point)) {
- break;
- }
- return {
- value: String.fromCodePoint(point),
- size,
- };
- }
- point = (point << 4) | readHexDigit(code);
- if (point < 0) {
- break;
- }
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid Unicode escape sequence: "${body.slice(
- position,
- position + size,
- )}".`,
- );
- }
- function readEscapedUnicodeFixedWidth(lexer, position) {
- const body = lexer.source.body;
- const code = read16BitHexCode(body, position + 2);
- if (isUnicodeScalarValue(code)) {
- return {
- value: String.fromCodePoint(code),
- size: 6,
- };
- }
-
- if (isLeadingSurrogate(code)) {
-
- if (
- body.charCodeAt(position + 6) === 0x005c &&
- body.charCodeAt(position + 7) === 0x0075
- ) {
- const trailingCode = read16BitHexCode(body, position + 8);
- if (isTrailingSurrogate(trailingCode)) {
-
-
-
-
-
-
- return {
- value: String.fromCodePoint(code, trailingCode),
- size: 12,
- };
- }
- }
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`,
- );
- }
- function read16BitHexCode(body, position) {
-
-
- return (
- (readHexDigit(body.charCodeAt(position)) << 12) |
- (readHexDigit(body.charCodeAt(position + 1)) << 8) |
- (readHexDigit(body.charCodeAt(position + 2)) << 4) |
- readHexDigit(body.charCodeAt(position + 3))
- );
- }
- function readHexDigit(code) {
- return code >= 0x0030 && code <= 0x0039
- ? code - 0x0030
- : code >= 0x0041 && code <= 0x0046
- ? code - 0x0037
- : code >= 0x0061 && code <= 0x0066
- ? code - 0x0057
- : -1;
- }
- function readEscapedCharacter(lexer, position) {
- const body = lexer.source.body;
- const code = body.charCodeAt(position + 1);
- switch (code) {
- case 0x0022:
-
- return {
- value: '\u0022',
- size: 2,
- };
- case 0x005c:
-
- return {
- value: '\u005c',
- size: 2,
- };
- case 0x002f:
-
- return {
- value: '\u002f',
- size: 2,
- };
- case 0x0062:
-
- return {
- value: '\u0008',
- size: 2,
- };
- case 0x0066:
-
- return {
- value: '\u000c',
- size: 2,
- };
- case 0x006e:
-
- return {
- value: '\u000a',
- size: 2,
- };
- case 0x0072:
-
- return {
- value: '\u000d',
- size: 2,
- };
- case 0x0074:
-
- return {
- value: '\u0009',
- size: 2,
- };
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid character escape sequence: "${body.slice(
- position,
- position + 2,
- )}".`,
- );
- }
- function readBlockString(lexer, start) {
- const body = lexer.source.body;
- const bodyLength = body.length;
- let lineStart = lexer.lineStart;
- let position = start + 3;
- let chunkStart = position;
- let currentLine = '';
- const blockLines = [];
- while (position < bodyLength) {
- const code = body.charCodeAt(position);
- if (
- code === 0x0022 &&
- body.charCodeAt(position + 1) === 0x0022 &&
- body.charCodeAt(position + 2) === 0x0022
- ) {
- currentLine += body.slice(chunkStart, position);
- blockLines.push(currentLine);
- const token = createToken(
- lexer,
- _tokenKind.TokenKind.BLOCK_STRING,
- start,
- position + 3,
- (0, _blockString.dedentBlockStringLines)(blockLines).join('\n'),
- );
- lexer.line += blockLines.length - 1;
- lexer.lineStart = lineStart;
- return token;
- }
- if (
- code === 0x005c &&
- body.charCodeAt(position + 1) === 0x0022 &&
- body.charCodeAt(position + 2) === 0x0022 &&
- body.charCodeAt(position + 3) === 0x0022
- ) {
- currentLine += body.slice(chunkStart, position);
- chunkStart = position + 1;
- position += 4;
- continue;
- }
- if (code === 0x000a || code === 0x000d) {
- currentLine += body.slice(chunkStart, position);
- blockLines.push(currentLine);
- if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) {
- position += 2;
- } else {
- ++position;
- }
- currentLine = '';
- chunkStart = position;
- lineStart = position;
- continue;
- }
- if (isUnicodeScalarValue(code)) {
- ++position;
- } else if (isSupplementaryCodePoint(body, position)) {
- position += 2;
- } else {
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- `Invalid character within String: ${printCodePointAt(
- lexer,
- position,
- )}.`,
- );
- }
- }
- throw (0, _syntaxError.syntaxError)(
- lexer.source,
- position,
- 'Unterminated string.',
- );
- }
- function readName(lexer, start) {
- const body = lexer.source.body;
- const bodyLength = body.length;
- let position = start + 1;
- while (position < bodyLength) {
- const code = body.charCodeAt(position);
- if ((0, _characterClasses.isNameContinue)(code)) {
- ++position;
- } else {
- break;
- }
- }
- return createToken(
- lexer,
- _tokenKind.TokenKind.NAME,
- start,
- position,
- body.slice(start, position),
- );
- }
|