TexParser.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /*************************************************************
  2. *
  3. * Copyright (c) 2017-2022 The MathJax Consortium
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /**
  18. * @fileoverview The TexParser. Implements the basic parsing functionality and
  19. * administers the global stack and tree objects.
  20. *
  21. * @author v.sorge@mathjax.org (Volker Sorge)
  22. */
  23. import ParseUtil from './ParseUtil.js';
  24. import {HandlerType} from './MapHandler.js';
  25. import Stack from './Stack.js';
  26. import StackItemFactory from './StackItemFactory.js';
  27. import {Tags} from './Tags.js';
  28. import TexError from './TexError.js';
  29. import {MmlNode, AbstractMmlNode} from '../../core/MmlTree/MmlNode.js';
  30. import {ParseInput, ParseResult} from './Types.js';
  31. import ParseOptions from './ParseOptions.js';
  32. import {StackItem, EnvList} from './StackItem.js';
  33. import {Symbol} from './Symbol.js';
  34. import {OptionList} from '../../util/Options.js';
  35. /**
  36. * The main Tex Parser class.
  37. */
  38. export default class TexParser {
  39. /**
  40. * Counter for recursive macros.
  41. * @type {number}
  42. */
  43. public macroCount: number = 0;
  44. /**
  45. * The stack for items and created nodes.
  46. * @type {Stack}
  47. */
  48. public stack: Stack;
  49. /**
  50. * Current position in the string that is parsed.
  51. * @type {number}
  52. */
  53. public i: number = 0;
  54. /**
  55. * The last command sequence
  56. * @type {string}
  57. */
  58. public currentCS: string = '';
  59. /**
  60. * @constructor
  61. * @param {string} string The string to parse.
  62. * @param {EnvList} env The intial environment representing the current parse
  63. * state of the overall expression translation.
  64. * @param {ParseOptions} configuration A parser configuration.
  65. */
  66. constructor(private _string: string, env: EnvList, public configuration: ParseOptions) {
  67. const inner = env.hasOwnProperty('isInner');
  68. const isInner = env['isInner'] as boolean;
  69. delete env['isInner'];
  70. let ENV: EnvList;
  71. if (env) {
  72. ENV = {};
  73. for (const id of Object.keys(env)) {
  74. ENV[id] = env[id];
  75. }
  76. }
  77. this.configuration.pushParser(this);
  78. this.stack = new Stack(this.itemFactory, ENV, inner ? isInner : true);
  79. this.Parse();
  80. this.Push(this.itemFactory.create('stop'));
  81. }
  82. /**
  83. * @return {OptionList} The configuration options.
  84. */
  85. get options(): OptionList {
  86. return this.configuration.options;
  87. }
  88. /**
  89. * @return {StackItemFactory} The factory for stack items.
  90. */
  91. get itemFactory(): StackItemFactory {
  92. return this.configuration.itemFactory;
  93. }
  94. /**
  95. * @return {Tags} The tags style of this configuration.
  96. */
  97. get tags(): Tags {
  98. return this.configuration.tags;
  99. }
  100. /**
  101. * Sets the string that should be parsed.
  102. * @param {string} str The new string to parse.
  103. */
  104. set string(str: string) {
  105. this._string = str;
  106. }
  107. /**
  108. * @return {string} The string that is currently parsed.
  109. */
  110. get string(): string {
  111. return this._string;
  112. }
  113. /**
  114. * Parses the input with the specified kind of map.
  115. * @param {HandlerType} kind Configuration name.
  116. * @param {ParseInput} input Input to be parsed.
  117. * @return {ParseResult} The output of the parsing function.
  118. */
  119. public parse(kind: HandlerType, input: ParseInput): ParseResult {
  120. return this.configuration.handlers.get(kind).parse(input);
  121. }
  122. /**
  123. * Maps a symbol to its "parse value" if it exists.
  124. * @param {HandlerType} kind Configuration name.
  125. * @param {string} symbol The symbol to parse.
  126. * @return {any} A boolean, Character, or Macro.
  127. */
  128. public lookup(kind: HandlerType, symbol: string): any {
  129. return this.configuration.handlers.get(kind).lookup(symbol);
  130. }
  131. /**
  132. * Checks if a symbol is contained in one of the symbol mappings of the
  133. * specified kind.
  134. * @param {HandlerType} kind Configuration name.
  135. * @param {string} symbol The symbol to parse.
  136. * @return {boolean} True if the symbol is contained in the given types of
  137. * symbol mapping.
  138. */
  139. public contains(kind: HandlerType, symbol: string): boolean {
  140. return this.configuration.handlers.get(kind).contains(symbol);
  141. }
  142. /**
  143. * @override
  144. */
  145. public toString(): string {
  146. let str = '';
  147. for (const config of Array.from(this.configuration.handlers.keys())) {
  148. str += config + ': ' +
  149. this.configuration.handlers.get(config as HandlerType) + '\n';
  150. }
  151. return str;
  152. }
  153. /**
  154. * Parses the current input string.
  155. */
  156. public Parse() {
  157. let c: string;
  158. while (this.i < this.string.length) {
  159. c = this.getCodePoint();
  160. this.i += c.length;
  161. this.parse('character', [this, c]);
  162. }
  163. }
  164. /**
  165. * Pushes a new item onto the stack. The item can also be a Mml node,
  166. * but if the mml item is an inferred row, push its children instead.
  167. * @param {StackItem|MmlNode} arg The new item.
  168. */
  169. public Push(arg: StackItem | MmlNode) {
  170. if (arg instanceof AbstractMmlNode && arg.isInferred) {
  171. this.PushAll(arg.childNodes);
  172. } else {
  173. this.stack.Push(arg);
  174. }
  175. }
  176. /**
  177. * Pushes a list of new items onto the stack.
  178. * @param {StackItem|MmlNode[]} args The new items.
  179. */
  180. public PushAll(args: (StackItem | MmlNode)[]) {
  181. for (const arg of args) {
  182. this.stack.Push(arg);
  183. }
  184. }
  185. /**
  186. * @return {MmlNode} The internal Mathml structure.
  187. */
  188. public mml(): MmlNode {
  189. if (!this.stack.Top().isKind('mml')) {
  190. return null;
  191. }
  192. let node = this.stack.Top().First;
  193. this.configuration.popParser();
  194. return node;
  195. }
  196. /************************************************************************
  197. *
  198. * String handling routines
  199. */
  200. /**
  201. * Convert delimiter to character.
  202. * @param {string} c The delimiter name.
  203. * @return {string} The corresponding character.
  204. */
  205. public convertDelimiter(c: string): string {
  206. const symbol = this.lookup('delimiter', c) as Symbol;
  207. return symbol ? symbol.char : null;
  208. }
  209. /**
  210. * @return {string} Get the next unicode character in the string
  211. */
  212. public getCodePoint(): string {
  213. const code = this.string.codePointAt(this.i);
  214. return code === undefined ? '' : String.fromCodePoint(code);
  215. }
  216. /**
  217. * @return {boolean} True if the next character to parse is a space.
  218. */
  219. public nextIsSpace(): boolean {
  220. return !!this.string.charAt(this.i).match(/\s/);
  221. }
  222. /**
  223. * @return {string} Get the next non-space character.
  224. */
  225. public GetNext(): string {
  226. while (this.nextIsSpace()) {
  227. this.i++;
  228. }
  229. return this.getCodePoint();
  230. }
  231. /**
  232. * @return {string} Get and return a control-sequence name
  233. */
  234. public GetCS(): string {
  235. let CS = this.string.slice(this.i).match(/^(([a-z]+) ?|[\uD800-\uDBFF].|.)/i);
  236. if (CS) {
  237. this.i += CS[0].length;
  238. return CS[2] || CS[1];
  239. } else {
  240. this.i++;
  241. return ' ';
  242. }
  243. }
  244. /**
  245. * Get and return a TeX argument (either a single character or control
  246. * sequence, or the contents of the next set of braces).
  247. * @param {string} name Name of the current control sequence.
  248. * @param {boolean} noneOK? True if no argument is OK.
  249. * @return {string} The next argument.
  250. */
  251. public GetArgument(_name: string, noneOK?: boolean): string {
  252. switch (this.GetNext()) {
  253. case '':
  254. if (!noneOK) {
  255. // @test MissingArgFor
  256. throw new TexError('MissingArgFor', 'Missing argument for %1', this.currentCS);
  257. }
  258. return null;
  259. case '}':
  260. if (!noneOK) {
  261. // @test ExtraCloseMissingOpen
  262. throw new TexError('ExtraCloseMissingOpen',
  263. 'Extra close brace or missing open brace');
  264. }
  265. return null;
  266. case '\\':
  267. this.i++;
  268. return '\\' + this.GetCS();
  269. case '{':
  270. let j = ++this.i, parens = 1;
  271. while (this.i < this.string.length) {
  272. switch (this.string.charAt(this.i++)) {
  273. case '\\': this.i++; break;
  274. case '{': parens++; break;
  275. case '}':
  276. if (--parens === 0) {
  277. return this.string.slice(j, this.i - 1);
  278. }
  279. break;
  280. }
  281. }
  282. // @test MissingCloseBrace
  283. throw new TexError('MissingCloseBrace', 'Missing close brace');
  284. }
  285. const c = this.getCodePoint();
  286. this.i += c.length;
  287. return c;
  288. }
  289. /**
  290. * Get an optional LaTeX argument in brackets.
  291. * @param {string} name Name of the current control sequence.
  292. * @param {string} def? The default value for the optional argument.
  293. * @return {string} The optional argument.
  294. */
  295. public GetBrackets(_name: string, def?: string): string {
  296. if (this.GetNext() !== '[') {
  297. return def;
  298. }
  299. let j = ++this.i, parens = 0;
  300. while (this.i < this.string.length) {
  301. switch (this.string.charAt(this.i++)) {
  302. case '{': parens++; break;
  303. case '\\': this.i++; break;
  304. case '}':
  305. if (parens-- <= 0) {
  306. // @test ExtraCloseLooking1
  307. throw new TexError('ExtraCloseLooking',
  308. 'Extra close brace while looking for %1', '\']\'');
  309. }
  310. break;
  311. case ']':
  312. if (parens === 0) {
  313. return this.string.slice(j, this.i - 1);
  314. }
  315. break;
  316. }
  317. }
  318. // @test MissingCloseBracket
  319. throw new TexError('MissingCloseBracket',
  320. 'Could not find closing \']\' for argument to %1', this.currentCS);
  321. }
  322. /**
  323. * Get the name of a delimiter (check it in the delimiter list).
  324. * @param {string} name Name of the current control sequence.
  325. * @param {boolean} braceOK? Are braces around the delimiter OK.
  326. * @return {string} The delimiter name.
  327. */
  328. public GetDelimiter(name: string, braceOK?: boolean): string {
  329. let c = this.GetNext(); this.i += c.length;
  330. if (this.i <= this.string.length) {
  331. if (c === '\\') {
  332. c += this.GetCS();
  333. } else if (c === '{' && braceOK) {
  334. this.i--;
  335. c = this.GetArgument(name).trim();
  336. }
  337. if (this.contains('delimiter', c)) {
  338. return this.convertDelimiter(c);
  339. }
  340. }
  341. // @test MissingOrUnrecognizedDelim1, MissingOrUnrecognizedDelim2
  342. throw new TexError('MissingOrUnrecognizedDelim',
  343. 'Missing or unrecognized delimiter for %1', this.currentCS);
  344. }
  345. /**
  346. * Get a dimension (including its units).
  347. * @param {string} name Name of the current control sequence.
  348. * @return {string} The dimension string.
  349. */
  350. public GetDimen(name: string): string {
  351. if (this.GetNext() === '{') {
  352. let dimen = this.GetArgument(name);
  353. let [value, unit] = ParseUtil.matchDimen(dimen);
  354. if (value) {
  355. // @test Raise In Line, Lower 2, (Raise|Lower) Negative
  356. return value + unit;
  357. }
  358. } else {
  359. // @test Above, Raise, Lower, Modulo, Above With Delims
  360. let dimen = this.string.slice(this.i);
  361. let [value, unit, length] = ParseUtil.matchDimen(dimen, true);
  362. if (value) {
  363. this.i += length;
  364. return value + unit;
  365. }
  366. }
  367. // @test MissingDimOrUnits
  368. throw new TexError('MissingDimOrUnits',
  369. 'Missing dimension or its units for %1', this.currentCS);
  370. }
  371. /**
  372. * Get everything up to the given control sequence (token)
  373. * @param {string} name Name of the current control sequence.
  374. * @param {string} token The element until where to parse.
  375. * @return {string} The text between the current position and the given token.
  376. */
  377. public GetUpTo(_name: string, token: string): string {
  378. while (this.nextIsSpace()) {
  379. this.i++;
  380. }
  381. let j = this.i;
  382. let parens = 0;
  383. while (this.i < this.string.length) {
  384. let k = this.i;
  385. let c = this.GetNext(); this.i += c.length;
  386. switch (c) {
  387. case '\\': c += this.GetCS(); break;
  388. case '{': parens++; break;
  389. case '}':
  390. if (parens === 0) {
  391. // @test ExtraCloseLooking2
  392. throw new TexError('ExtraCloseLooking',
  393. 'Extra close brace while looking for %1', token);
  394. }
  395. parens--;
  396. break;
  397. }
  398. if (parens === 0 && c === token) {
  399. return this.string.slice(j, k);
  400. }
  401. }
  402. // @test TokenNotFoundForCommand
  403. throw new TexError('TokenNotFoundForCommand',
  404. 'Could not find %1 for %2', token, this.currentCS);
  405. }
  406. /**
  407. * Parse the arguments of a control sequence in a new parser instance.
  408. * @param {string} name Name of the current control sequence.
  409. * @return {MmlNode} The parsed node.
  410. */
  411. public ParseArg(name: string): MmlNode {
  412. return new TexParser(this.GetArgument(name), this.stack.env,
  413. this.configuration).mml();
  414. }
  415. /**
  416. * Parses a given string up to a given token in a new parser instance.
  417. * @param {string} name Name of the current control sequence.
  418. * @param {string} token A Token at which to end parsing.
  419. * @return {MmlNode} The parsed node.
  420. */
  421. public ParseUpTo(name: string, token: string): MmlNode {
  422. return new TexParser(this.GetUpTo(name, token), this.stack.env,
  423. this.configuration).mml();
  424. }
  425. /**
  426. * Get a delimiter or empty argument
  427. * @param {string} name Name of the current control sequence.
  428. * @return {string} The delimiter.
  429. */
  430. public GetDelimiterArg(name: string): string {
  431. let c = ParseUtil.trimSpaces(this.GetArgument(name));
  432. if (c === '') {
  433. return null;
  434. }
  435. if (this.contains('delimiter', c)) {
  436. return c;
  437. }
  438. // @test MissingOrUnrecognizedDelim
  439. throw new TexError('MissingOrUnrecognizedDelim',
  440. 'Missing or unrecognized delimiter for %1', this.currentCS);
  441. }
  442. /**
  443. * @return {boolean} True if a star follows the control sequence name.
  444. */
  445. public GetStar(): boolean {
  446. let star = (this.GetNext() === '*');
  447. if (star) {
  448. this.i++;
  449. }
  450. return star;
  451. }
  452. /**
  453. * Convenience method to create nodes with the node factory of the current
  454. * configuration.
  455. * @param {string} kind The kind of node to create.
  456. * @param {any[]} ...rest The remaining arguments for the creation method.
  457. * @return {MmlNode} The newly created node.
  458. */
  459. public create(kind: string, ...rest: any[]): MmlNode {
  460. return this.configuration.nodeFactory.create(kind, ...rest);
  461. }
  462. }