FindTeX.ts 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*************************************************************
  2. *
  3. * Copyright (c) 2017-2022 The MathJax Consortium
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /**
  18. * @fileoverview Implements the TeX version of the FindMath object
  19. *
  20. * @author dpvc@mathjax.org (Davide Cervone)
  21. */
  22. import {AbstractFindMath} from '../../core/FindMath.js';
  23. import {OptionList} from '../../util/Options.js';
  24. import {sortLength, quotePattern} from '../../util/string.js';
  25. import {ProtoItem, protoItem} from '../../core/MathItem.js';
  26. /**
  27. * Shorthand types for data about end delimiters and delimiter pairs
  28. */
  29. export type EndItem = [string, boolean, RegExp];
  30. export type Delims = [string, string];
  31. /*****************************************************************/
  32. /*
  33. * Implements the FindTeX class (extends AbstractFindMath)
  34. *
  35. * Locates TeX expressions within strings
  36. */
  37. /*
  38. * @template N The HTMLElement node class
  39. * @template T The Text node class
  40. * @template D The Document class
  41. */
  42. export class FindTeX<N, T, D> extends AbstractFindMath<N, T, D> {
  43. /**
  44. * @type {OptionList}
  45. */
  46. public static OPTIONS: OptionList = {
  47. inlineMath: [ // The start/end delimiter pairs for in-line math
  48. // ['$', '$'], // (comment out any you don't want, or add your own, but
  49. ['\\(', '\\)'] // be sure that you don't have an extra comma at the end)
  50. ],
  51. displayMath: [ // The start/end delimiter pairs for display math
  52. ['$$', '$$'], // (comment out any you don't want, or add your own, but
  53. ['\\[', '\\]'] // be sure that you don't have an extra comma at the end)
  54. ],
  55. processEscapes: true, // set to true to allow \$ to produce a dollar without
  56. // starting in-line math mode
  57. processEnvironments: true, // set to true to process \begin{xxx}...\end{xxx} outside
  58. // of math mode, false to prevent that
  59. processRefs: true, // set to true to process \ref{...} outside of math mode
  60. };
  61. /**
  62. * The regular expression for any starting delimiter
  63. */
  64. protected start: RegExp;
  65. /**
  66. * The end-delimiter data keyed to the opening delimiter string
  67. */
  68. protected end: {[name: string]: EndItem};
  69. /**
  70. * False if the configuration has no delimiters (so search can be skipped), true otherwise
  71. */
  72. protected hasPatterns: boolean;
  73. /**
  74. * The index of the \begin...\end pattern in the regex match array
  75. */
  76. protected env: number;
  77. /**
  78. * The index of the \ref and escaped character patters in the regex match array
  79. */
  80. protected sub: number;
  81. /**
  82. * @override
  83. */
  84. constructor(options: OptionList) {
  85. super(options);
  86. this.getPatterns();
  87. }
  88. /**
  89. * Create the patterns needed for searching the strings for TeX
  90. * based on the configuration options
  91. */
  92. protected getPatterns() {
  93. let options = this.options;
  94. let starts: string[] = [], parts: string[] = [], subparts: string[] = [];
  95. this.end = {};
  96. this.env = this.sub = 0;
  97. let i = 1;
  98. options['inlineMath'].forEach((delims: Delims) => this.addPattern(starts, delims, false));
  99. options['displayMath'].forEach((delims: Delims) => this.addPattern(starts, delims, true));
  100. if (starts.length) {
  101. parts.push(starts.sort(sortLength).join('|'));
  102. }
  103. if (options['processEnvironments']) {
  104. parts.push('\\\\begin\\s*\\{([^}]*)\\}');
  105. this.env = i;
  106. i++;
  107. }
  108. if (options['processEscapes']) {
  109. subparts.push('\\\\([\\\\$])');
  110. }
  111. if (options['processRefs']) {
  112. subparts.push('(\\\\(?:eq)?ref\\s*\\{[^}]*\\})');
  113. }
  114. if (subparts.length) {
  115. parts.push('(' + subparts.join('|') + ')');
  116. this.sub = i;
  117. }
  118. this.start = new RegExp(parts.join('|'), 'g');
  119. this.hasPatterns = (parts.length > 0);
  120. }
  121. /**
  122. * Add the needed patterns for a pair of delimiters
  123. *
  124. * @param {string[]} starts Array of starting delimiter strings
  125. * @param {Delims} delims Array of delimiter strings, as [start, end]
  126. * @param {boolean} display True if the delimiters are for display mode
  127. */
  128. protected addPattern(starts: string[], delims: Delims, display: boolean) {
  129. let [open, close] = delims;
  130. starts.push(quotePattern(open));
  131. this.end[open] = [close, display, this.endPattern(close)];
  132. }
  133. /**
  134. * Create the pattern for a close delimiter
  135. *
  136. * @param {string} end The end delimiter text
  137. * @param {string} endp The end delimiter pattern (overrides the literal end pattern)
  138. * @return {RegExp} The regular expression for the end delimiter
  139. */
  140. protected endPattern(end: string, endp?: string): RegExp {
  141. return new RegExp((endp || quotePattern(end)) + '|\\\\(?:[a-zA-Z]|.)|[{}]', 'g');
  142. }
  143. /**
  144. * Search for the end delimiter given the start delimiter,
  145. * skipping braced groups, and control sequences that aren't
  146. * the close delimiter.
  147. *
  148. * @param {string} text The string being searched for the end delimiter
  149. * @param {number} n The index of the string being searched
  150. * @param {RegExpExecArray} start The result array from the start-delimiter search
  151. * @param {EndItem} end The end-delimiter data corresponding to the start delimiter
  152. * @return {ProtoItem<N,T>} The proto math item for the math, if found
  153. */
  154. protected findEnd(text: string, n: number, start: RegExpExecArray, end: EndItem): ProtoItem<N, T> {
  155. let [close, display, pattern] = end;
  156. let i = pattern.lastIndex = start.index + start[0].length;
  157. let match: RegExpExecArray, braces: number = 0;
  158. while ((match = pattern.exec(text))) {
  159. if ((match[1] || match[0]) === close && braces === 0) {
  160. return protoItem<N, T>(start[0], text.substr(i, match.index - i), match[0],
  161. n, start.index, match.index + match[0].length, display);
  162. } else if (match[0] === '{') {
  163. braces++;
  164. } else if (match[0] === '}' && braces) {
  165. braces--;
  166. }
  167. }
  168. return null;
  169. }
  170. /**
  171. * Search a string for math delimited by one of the delimiter pairs,
  172. * or by \begin{env}...\end{env}, or \eqref{...}, \ref{...}, \\, or \$.
  173. *
  174. * @param {ProtoItem[]} math The array of proto math items located so far
  175. * @param {number} n The index of the string being searched
  176. * @param {string} text The string being searched
  177. */
  178. protected findMathInString(math: ProtoItem<N, T>[], n: number, text: string) {
  179. let start, match;
  180. this.start.lastIndex = 0;
  181. while ((start = this.start.exec(text))) {
  182. if (start[this.env] !== undefined && this.env) {
  183. let end = '\\\\end\\s*(\\{' + quotePattern(start[this.env]) + '\\})';
  184. match = this.findEnd(text, n, start, ['{' + start[this.env] + '}', true, this.endPattern(null, end)]);
  185. if (match) {
  186. match.math = match.open + match.math + match.close;
  187. match.open = match.close = '';
  188. }
  189. } else if (start[this.sub] !== undefined && this.sub) {
  190. let math = start[this.sub];
  191. let end = start.index + start[this.sub].length;
  192. if (math.length === 2) {
  193. match = protoItem<N, T>('', math.substr(1), '', n, start.index, end);
  194. } else {
  195. match = protoItem<N, T>('', math, '', n, start.index, end, false);
  196. }
  197. } else {
  198. match = this.findEnd(text, n, start, this.end[start[0]]);
  199. }
  200. if (match) {
  201. math.push(match);
  202. this.start.lastIndex = match.end.n;
  203. }
  204. }
  205. }
  206. /**
  207. * Search for math in an array of strings and return an array of matches.
  208. *
  209. * @override
  210. */
  211. public findMath(strings: string[]) {
  212. let math: ProtoItem<N, T>[] = [];
  213. if (this.hasPatterns) {
  214. for (let i = 0, m = strings.length; i < m; i++) {
  215. this.findMathInString(math, i, strings[i]);
  216. }
  217. }
  218. return math;
  219. }
  220. }