123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514 |
- /*************************************************************
- *
- * Copyright (c) 2017-2022 The MathJax Consortium
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /**
- * @fileoverview The TexParser. Implements the basic parsing functionality and
- * administers the global stack and tree objects.
- *
- * @author v.sorge@mathjax.org (Volker Sorge)
- */
- import ParseUtil from './ParseUtil.js';
- import {HandlerType} from './MapHandler.js';
- import Stack from './Stack.js';
- import StackItemFactory from './StackItemFactory.js';
- import {Tags} from './Tags.js';
- import TexError from './TexError.js';
- import {MmlNode, AbstractMmlNode} from '../../core/MmlTree/MmlNode.js';
- import {ParseInput, ParseResult} from './Types.js';
- import ParseOptions from './ParseOptions.js';
- import {StackItem, EnvList} from './StackItem.js';
- import {Symbol} from './Symbol.js';
- import {OptionList} from '../../util/Options.js';
- /**
- * The main Tex Parser class.
- */
- export default class TexParser {
- /**
- * Counter for recursive macros.
- * @type {number}
- */
- public macroCount: number = 0;
- /**
- * The stack for items and created nodes.
- * @type {Stack}
- */
- public stack: Stack;
- /**
- * Current position in the string that is parsed.
- * @type {number}
- */
- public i: number = 0;
- /**
- * The last command sequence
- * @type {string}
- */
- public currentCS: string = '';
- /**
- * @constructor
- * @param {string} string The string to parse.
- * @param {EnvList} env The intial environment representing the current parse
- * state of the overall expression translation.
- * @param {ParseOptions} configuration A parser configuration.
- */
- constructor(private _string: string, env: EnvList, public configuration: ParseOptions) {
- const inner = env.hasOwnProperty('isInner');
- const isInner = env['isInner'] as boolean;
- delete env['isInner'];
- let ENV: EnvList;
- if (env) {
- ENV = {};
- for (const id of Object.keys(env)) {
- ENV[id] = env[id];
- }
- }
- this.configuration.pushParser(this);
- this.stack = new Stack(this.itemFactory, ENV, inner ? isInner : true);
- this.Parse();
- this.Push(this.itemFactory.create('stop'));
- }
- /**
- * @return {OptionList} The configuration options.
- */
- get options(): OptionList {
- return this.configuration.options;
- }
- /**
- * @return {StackItemFactory} The factory for stack items.
- */
- get itemFactory(): StackItemFactory {
- return this.configuration.itemFactory;
- }
- /**
- * @return {Tags} The tags style of this configuration.
- */
- get tags(): Tags {
- return this.configuration.tags;
- }
- /**
- * Sets the string that should be parsed.
- * @param {string} str The new string to parse.
- */
- set string(str: string) {
- this._string = str;
- }
- /**
- * @return {string} The string that is currently parsed.
- */
- get string(): string {
- return this._string;
- }
- /**
- * Parses the input with the specified kind of map.
- * @param {HandlerType} kind Configuration name.
- * @param {ParseInput} input Input to be parsed.
- * @return {ParseResult} The output of the parsing function.
- */
- public parse(kind: HandlerType, input: ParseInput): ParseResult {
- return this.configuration.handlers.get(kind).parse(input);
- }
- /**
- * Maps a symbol to its "parse value" if it exists.
- * @param {HandlerType} kind Configuration name.
- * @param {string} symbol The symbol to parse.
- * @return {any} A boolean, Character, or Macro.
- */
- public lookup(kind: HandlerType, symbol: string): any {
- return this.configuration.handlers.get(kind).lookup(symbol);
- }
- /**
- * Checks if a symbol is contained in one of the symbol mappings of the
- * specified kind.
- * @param {HandlerType} kind Configuration name.
- * @param {string} symbol The symbol to parse.
- * @return {boolean} True if the symbol is contained in the given types of
- * symbol mapping.
- */
- public contains(kind: HandlerType, symbol: string): boolean {
- return this.configuration.handlers.get(kind).contains(symbol);
- }
- /**
- * @override
- */
- public toString(): string {
- let str = '';
- for (const config of Array.from(this.configuration.handlers.keys())) {
- str += config + ': ' +
- this.configuration.handlers.get(config as HandlerType) + '\n';
- }
- return str;
- }
- /**
- * Parses the current input string.
- */
- public Parse() {
- let c: string;
- while (this.i < this.string.length) {
- c = this.getCodePoint();
- this.i += c.length;
- this.parse('character', [this, c]);
- }
- }
- /**
- * Pushes a new item onto the stack. The item can also be a Mml node,
- * but if the mml item is an inferred row, push its children instead.
- * @param {StackItem|MmlNode} arg The new item.
- */
- public Push(arg: StackItem | MmlNode) {
- if (arg instanceof AbstractMmlNode && arg.isInferred) {
- this.PushAll(arg.childNodes);
- } else {
- this.stack.Push(arg);
- }
- }
- /**
- * Pushes a list of new items onto the stack.
- * @param {StackItem|MmlNode[]} args The new items.
- */
- public PushAll(args: (StackItem | MmlNode)[]) {
- for (const arg of args) {
- this.stack.Push(arg);
- }
- }
- /**
- * @return {MmlNode} The internal Mathml structure.
- */
- public mml(): MmlNode {
- if (!this.stack.Top().isKind('mml')) {
- return null;
- }
- let node = this.stack.Top().First;
- this.configuration.popParser();
- return node;
- }
- /************************************************************************
- *
- * String handling routines
- */
- /**
- * Convert delimiter to character.
- * @param {string} c The delimiter name.
- * @return {string} The corresponding character.
- */
- public convertDelimiter(c: string): string {
- const symbol = this.lookup('delimiter', c) as Symbol;
- return symbol ? symbol.char : null;
- }
- /**
- * @return {string} Get the next unicode character in the string
- */
- public getCodePoint(): string {
- const code = this.string.codePointAt(this.i);
- return code === undefined ? '' : String.fromCodePoint(code);
- }
- /**
- * @return {boolean} True if the next character to parse is a space.
- */
- public nextIsSpace(): boolean {
- return !!this.string.charAt(this.i).match(/\s/);
- }
- /**
- * @return {string} Get the next non-space character.
- */
- public GetNext(): string {
- while (this.nextIsSpace()) {
- this.i++;
- }
- return this.getCodePoint();
- }
- /**
- * @return {string} Get and return a control-sequence name
- */
- public GetCS(): string {
- let CS = this.string.slice(this.i).match(/^(([a-z]+) ?|[\uD800-\uDBFF].|.)/i);
- if (CS) {
- this.i += CS[0].length;
- return CS[2] || CS[1];
- } else {
- this.i++;
- return ' ';
- }
- }
- /**
- * Get and return a TeX argument (either a single character or control
- * sequence, or the contents of the next set of braces).
- * @param {string} name Name of the current control sequence.
- * @param {boolean} noneOK? True if no argument is OK.
- * @return {string} The next argument.
- */
- public GetArgument(_name: string, noneOK?: boolean): string {
- switch (this.GetNext()) {
- case '':
- if (!noneOK) {
- // @test MissingArgFor
- throw new TexError('MissingArgFor', 'Missing argument for %1', this.currentCS);
- }
- return null;
- case '}':
- if (!noneOK) {
- // @test ExtraCloseMissingOpen
- throw new TexError('ExtraCloseMissingOpen',
- 'Extra close brace or missing open brace');
- }
- return null;
- case '\\':
- this.i++;
- return '\\' + this.GetCS();
- case '{':
- let j = ++this.i, parens = 1;
- while (this.i < this.string.length) {
- switch (this.string.charAt(this.i++)) {
- case '\\': this.i++; break;
- case '{': parens++; break;
- case '}':
- if (--parens === 0) {
- return this.string.slice(j, this.i - 1);
- }
- break;
- }
- }
- // @test MissingCloseBrace
- throw new TexError('MissingCloseBrace', 'Missing close brace');
- }
- const c = this.getCodePoint();
- this.i += c.length;
- return c;
- }
- /**
- * Get an optional LaTeX argument in brackets.
- * @param {string} name Name of the current control sequence.
- * @param {string} def? The default value for the optional argument.
- * @return {string} The optional argument.
- */
- public GetBrackets(_name: string, def?: string): string {
- if (this.GetNext() !== '[') {
- return def;
- }
- let j = ++this.i, parens = 0;
- while (this.i < this.string.length) {
- switch (this.string.charAt(this.i++)) {
- case '{': parens++; break;
- case '\\': this.i++; break;
- case '}':
- if (parens-- <= 0) {
- // @test ExtraCloseLooking1
- throw new TexError('ExtraCloseLooking',
- 'Extra close brace while looking for %1', '\']\'');
- }
- break;
- case ']':
- if (parens === 0) {
- return this.string.slice(j, this.i - 1);
- }
- break;
- }
- }
- // @test MissingCloseBracket
- throw new TexError('MissingCloseBracket',
- 'Could not find closing \']\' for argument to %1', this.currentCS);
- }
- /**
- * Get the name of a delimiter (check it in the delimiter list).
- * @param {string} name Name of the current control sequence.
- * @param {boolean} braceOK? Are braces around the delimiter OK.
- * @return {string} The delimiter name.
- */
- public GetDelimiter(name: string, braceOK?: boolean): string {
- let c = this.GetNext(); this.i += c.length;
- if (this.i <= this.string.length) {
- if (c === '\\') {
- c += this.GetCS();
- } else if (c === '{' && braceOK) {
- this.i--;
- c = this.GetArgument(name).trim();
- }
- if (this.contains('delimiter', c)) {
- return this.convertDelimiter(c);
- }
- }
- // @test MissingOrUnrecognizedDelim1, MissingOrUnrecognizedDelim2
- throw new TexError('MissingOrUnrecognizedDelim',
- 'Missing or unrecognized delimiter for %1', this.currentCS);
- }
- /**
- * Get a dimension (including its units).
- * @param {string} name Name of the current control sequence.
- * @return {string} The dimension string.
- */
- public GetDimen(name: string): string {
- if (this.GetNext() === '{') {
- let dimen = this.GetArgument(name);
- let [value, unit] = ParseUtil.matchDimen(dimen);
- if (value) {
- // @test Raise In Line, Lower 2, (Raise|Lower) Negative
- return value + unit;
- }
- } else {
- // @test Above, Raise, Lower, Modulo, Above With Delims
- let dimen = this.string.slice(this.i);
- let [value, unit, length] = ParseUtil.matchDimen(dimen, true);
- if (value) {
- this.i += length;
- return value + unit;
- }
- }
- // @test MissingDimOrUnits
- throw new TexError('MissingDimOrUnits',
- 'Missing dimension or its units for %1', this.currentCS);
- }
- /**
- * Get everything up to the given control sequence (token)
- * @param {string} name Name of the current control sequence.
- * @param {string} token The element until where to parse.
- * @return {string} The text between the current position and the given token.
- */
- public GetUpTo(_name: string, token: string): string {
- while (this.nextIsSpace()) {
- this.i++;
- }
- let j = this.i;
- let parens = 0;
- while (this.i < this.string.length) {
- let k = this.i;
- let c = this.GetNext(); this.i += c.length;
- switch (c) {
- case '\\': c += this.GetCS(); break;
- case '{': parens++; break;
- case '}':
- if (parens === 0) {
- // @test ExtraCloseLooking2
- throw new TexError('ExtraCloseLooking',
- 'Extra close brace while looking for %1', token);
- }
- parens--;
- break;
- }
- if (parens === 0 && c === token) {
- return this.string.slice(j, k);
- }
- }
- // @test TokenNotFoundForCommand
- throw new TexError('TokenNotFoundForCommand',
- 'Could not find %1 for %2', token, this.currentCS);
- }
- /**
- * Parse the arguments of a control sequence in a new parser instance.
- * @param {string} name Name of the current control sequence.
- * @return {MmlNode} The parsed node.
- */
- public ParseArg(name: string): MmlNode {
- return new TexParser(this.GetArgument(name), this.stack.env,
- this.configuration).mml();
- }
- /**
- * Parses a given string up to a given token in a new parser instance.
- * @param {string} name Name of the current control sequence.
- * @param {string} token A Token at which to end parsing.
- * @return {MmlNode} The parsed node.
- */
- public ParseUpTo(name: string, token: string): MmlNode {
- return new TexParser(this.GetUpTo(name, token), this.stack.env,
- this.configuration).mml();
- }
- /**
- * Get a delimiter or empty argument
- * @param {string} name Name of the current control sequence.
- * @return {string} The delimiter.
- */
- public GetDelimiterArg(name: string): string {
- let c = ParseUtil.trimSpaces(this.GetArgument(name));
- if (c === '') {
- return null;
- }
- if (this.contains('delimiter', c)) {
- return c;
- }
- // @test MissingOrUnrecognizedDelim
- throw new TexError('MissingOrUnrecognizedDelim',
- 'Missing or unrecognized delimiter for %1', this.currentCS);
- }
- /**
- * @return {boolean} True if a star follows the control sequence name.
- */
- public GetStar(): boolean {
- let star = (this.GetNext() === '*');
- if (star) {
- this.i++;
- }
- return star;
- }
- /**
- * Convenience method to create nodes with the node factory of the current
- * configuration.
- * @param {string} kind The kind of node to create.
- * @param {any[]} ...rest The remaining arguments for the creation method.
- * @return {MmlNode} The newly created node.
- */
- public create(kind: string, ...rest: any[]): MmlNode {
- return this.configuration.nodeFactory.create(kind, ...rest);
- }
- }
|