semantic_mathml.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. import * as DomUtil from '../common/dom_util.js';
  2. import { SemanticFont, SemanticRole, SemanticType } from './semantic_meaning.js';
  3. import { SemanticMap } from './semantic_attr.js';
  4. import { SemanticAbstractParser } from './semantic_parser.js';
  5. import * as SemanticPred from './semantic_pred.js';
  6. import { SemanticProcessor } from './semantic_processor.js';
  7. import * as SemanticUtil from './semantic_util.js';
  8. import { MMLTAGS } from '../semantic_tree/semantic_util.js';
  9. import { SemanticHeuristics } from './semantic_heuristic_factory.js';
  10. export class SemanticMathml extends SemanticAbstractParser {
  11. static getAttribute_(node, attr, def) {
  12. if (!node.hasAttribute(attr)) {
  13. return def;
  14. }
  15. const value = node.getAttribute(attr);
  16. if (value.match(/^\s*$/)) {
  17. return null;
  18. }
  19. return value;
  20. }
  21. constructor() {
  22. super('MathML');
  23. this.parseMap_ = new Map([
  24. [MMLTAGS.SEMANTICS, this.semantics_.bind(this)],
  25. [MMLTAGS.MATH, this.rows_.bind(this)],
  26. [MMLTAGS.MROW, this.rows_.bind(this)],
  27. [MMLTAGS.MPADDED, this.rows_.bind(this)],
  28. [MMLTAGS.MSTYLE, this.rows_.bind(this)],
  29. [MMLTAGS.MFRAC, this.fraction_.bind(this)],
  30. [MMLTAGS.MSUB, this.limits_.bind(this)],
  31. [MMLTAGS.MSUP, this.limits_.bind(this)],
  32. [MMLTAGS.MSUBSUP, this.limits_.bind(this)],
  33. [MMLTAGS.MOVER, this.limits_.bind(this)],
  34. [MMLTAGS.MUNDER, this.limits_.bind(this)],
  35. [MMLTAGS.MUNDEROVER, this.limits_.bind(this)],
  36. [MMLTAGS.MROOT, this.root_.bind(this)],
  37. [MMLTAGS.MSQRT, this.sqrt_.bind(this)],
  38. [MMLTAGS.MTABLE, this.table_.bind(this)],
  39. [MMLTAGS.MLABELEDTR, this.tableLabeledRow_.bind(this)],
  40. [MMLTAGS.MTR, this.tableRow_.bind(this)],
  41. [MMLTAGS.MTD, this.tableCell_.bind(this)],
  42. [MMLTAGS.MS, this.text_.bind(this)],
  43. [MMLTAGS.MTEXT, this.text_.bind(this)],
  44. [MMLTAGS.MSPACE, this.space_.bind(this)],
  45. [MMLTAGS.ANNOTATIONXML, this.text_.bind(this)],
  46. [MMLTAGS.MI, this.identifier_.bind(this)],
  47. [MMLTAGS.MN, this.number_.bind(this)],
  48. [MMLTAGS.MO, this.operator_.bind(this)],
  49. [MMLTAGS.MFENCED, this.fenced_.bind(this)],
  50. [MMLTAGS.MENCLOSE, this.enclosed_.bind(this)],
  51. [MMLTAGS.MMULTISCRIPTS, this.multiscripts_.bind(this)],
  52. [MMLTAGS.ANNOTATION, this.empty_.bind(this)],
  53. [MMLTAGS.NONE, this.empty_.bind(this)],
  54. [MMLTAGS.MACTION, this.action_.bind(this)]
  55. ]);
  56. const meaning = {
  57. type: SemanticType.IDENTIFIER,
  58. role: SemanticRole.NUMBERSET,
  59. font: SemanticFont.DOUBLESTRUCK
  60. };
  61. [
  62. 'C',
  63. 'H',
  64. 'N',
  65. 'P',
  66. 'Q',
  67. 'R',
  68. 'Z',
  69. 'ℂ',
  70. 'ℍ',
  71. 'ℕ',
  72. 'ℙ',
  73. 'ℚ',
  74. 'ℝ',
  75. 'ℤ'
  76. ].forEach(((x) => this.getFactory().defaultMap.set(x, meaning)).bind(this));
  77. }
  78. parse(mml) {
  79. SemanticProcessor.getInstance().setNodeFactory(this.getFactory());
  80. const children = DomUtil.toArray(mml.childNodes);
  81. const tag = DomUtil.tagName(mml);
  82. const func = this.parseMap_.get(tag);
  83. const newNode = (func ? func : this.dummy_.bind(this))(mml, children);
  84. SemanticUtil.addAttributes(newNode, mml);
  85. if ([
  86. MMLTAGS.MATH,
  87. MMLTAGS.MROW,
  88. MMLTAGS.MPADDED,
  89. MMLTAGS.MSTYLE,
  90. MMLTAGS.SEMANTICS,
  91. MMLTAGS.MACTION
  92. ].indexOf(tag) !== -1) {
  93. return newNode;
  94. }
  95. newNode.mathml.unshift(mml);
  96. newNode.mathmlTree = mml;
  97. return newNode;
  98. }
  99. semantics_(_node, children) {
  100. return children.length
  101. ? this.parse(children[0])
  102. : this.getFactory().makeEmptyNode();
  103. }
  104. rows_(node, children) {
  105. const semantics = node.getAttribute('semantics');
  106. if (semantics && semantics.match('bspr_')) {
  107. return SemanticProcessor.proof(node, semantics, this.parseList.bind(this));
  108. }
  109. children = SemanticUtil.purgeNodes(children);
  110. let newNode;
  111. if (children.length === 1) {
  112. newNode = this.parse(children[0]);
  113. if (newNode.type === SemanticType.EMPTY && !newNode.mathmlTree) {
  114. newNode.mathmlTree = node;
  115. }
  116. }
  117. else {
  118. const snode = SemanticHeuristics.run('function_from_identifiers', node);
  119. newNode =
  120. snode && snode !== node
  121. ? snode
  122. : SemanticProcessor.getInstance().row(this.parseList(children));
  123. }
  124. newNode.mathml.unshift(node);
  125. return newNode;
  126. }
  127. fraction_(node, children) {
  128. if (!children.length) {
  129. return this.getFactory().makeEmptyNode();
  130. }
  131. const upper = this.parse(children[0]);
  132. const lower = children[1]
  133. ? this.parse(children[1])
  134. : this.getFactory().makeEmptyNode();
  135. const sem = SemanticProcessor.getInstance().fractionLikeNode(upper, lower, node.getAttribute('linethickness'), node.getAttribute('bevelled') === 'true');
  136. return sem;
  137. }
  138. limits_(node, children) {
  139. return SemanticProcessor.getInstance().limitNode(DomUtil.tagName(node), this.parseList(children));
  140. }
  141. root_(node, children) {
  142. if (!children[1]) {
  143. return this.sqrt_(node, children);
  144. }
  145. return this.getFactory().makeBranchNode(SemanticType.ROOT, [this.parse(children[1]), this.parse(children[0])], []);
  146. }
  147. sqrt_(_node, children) {
  148. const semNodes = this.parseList(SemanticUtil.purgeNodes(children));
  149. return this.getFactory().makeBranchNode(SemanticType.SQRT, [SemanticProcessor.getInstance().row(semNodes)], []);
  150. }
  151. table_(node, children) {
  152. const semantics = node.getAttribute('semantics');
  153. if (semantics && semantics.match('bspr_')) {
  154. return SemanticProcessor.proof(node, semantics, this.parseList.bind(this));
  155. }
  156. const newNode = this.getFactory().makeBranchNode(SemanticType.TABLE, this.parseList(children), []);
  157. newNode.mathmlTree = node;
  158. return SemanticProcessor.tableToMultiline(newNode);
  159. }
  160. tableRow_(_node, children) {
  161. const newNode = this.getFactory().makeBranchNode(SemanticType.ROW, this.parseList(children), []);
  162. newNode.role = SemanticRole.TABLE;
  163. return newNode;
  164. }
  165. tableLabeledRow_(node, children) {
  166. var _a;
  167. if (!children.length) {
  168. return this.tableRow_(node, children);
  169. }
  170. const label = this.parse(children[0]);
  171. label.role = SemanticRole.LABEL;
  172. if (((_a = label.childNodes[0]) === null || _a === void 0 ? void 0 : _a.type) === SemanticType.TEXT) {
  173. label.childNodes[0].role = SemanticRole.LABEL;
  174. }
  175. const newNode = this.getFactory().makeBranchNode(SemanticType.ROW, this.parseList(children.slice(1)), [label]);
  176. newNode.role = SemanticRole.TABLE;
  177. return newNode;
  178. }
  179. tableCell_(_node, children) {
  180. const semNodes = this.parseList(SemanticUtil.purgeNodes(children));
  181. let childNodes;
  182. if (!semNodes.length) {
  183. childNodes = [];
  184. }
  185. else if (semNodes.length === 1 &&
  186. SemanticPred.isType(semNodes[0], SemanticType.EMPTY)) {
  187. childNodes = semNodes;
  188. }
  189. else {
  190. childNodes = [SemanticProcessor.getInstance().row(semNodes)];
  191. }
  192. const newNode = this.getFactory().makeBranchNode(SemanticType.CELL, childNodes, []);
  193. newNode.role = SemanticRole.TABLE;
  194. return newNode;
  195. }
  196. space_(node, children) {
  197. const width = node.getAttribute('width');
  198. const match = width && width.match(/[a-z]*$/);
  199. if (!match) {
  200. return this.empty_(node, children);
  201. }
  202. const sizes = {
  203. cm: 0.4,
  204. pc: 0.5,
  205. em: 0.5,
  206. ex: 1,
  207. in: 0.15,
  208. pt: 5,
  209. mm: 5
  210. };
  211. const unit = match[0];
  212. const measure = parseFloat(width.slice(0, match.index));
  213. const size = sizes[unit];
  214. if (!size || isNaN(measure) || measure < size) {
  215. return this.empty_(node, children);
  216. }
  217. const newNode = this.getFactory().makeUnprocessed(node);
  218. return SemanticProcessor.getInstance().text(newNode, DomUtil.tagName(node));
  219. }
  220. text_(node, children) {
  221. const newNode = this.leaf_(node, children);
  222. if (!node.textContent) {
  223. return newNode;
  224. }
  225. newNode.updateContent(node.textContent, true);
  226. return SemanticProcessor.getInstance().text(newNode, DomUtil.tagName(node));
  227. }
  228. identifier_(node, children) {
  229. const newNode = this.leaf_(node, children);
  230. return SemanticProcessor.getInstance().identifierNode(newNode, SemanticProcessor.getInstance().font(node.getAttribute('mathvariant')), node.getAttribute('class'));
  231. }
  232. number_(node, children) {
  233. const newNode = this.leaf_(node, children);
  234. SemanticProcessor.number(newNode);
  235. return newNode;
  236. }
  237. operator_(node, children) {
  238. const newNode = this.leaf_(node, children);
  239. SemanticProcessor.getInstance().operatorNode(newNode);
  240. return newNode;
  241. }
  242. fenced_(node, children) {
  243. const semNodes = this.parseList(SemanticUtil.purgeNodes(children));
  244. const sepValue = SemanticMathml.getAttribute_(node, 'separators', ',');
  245. const open = SemanticMathml.getAttribute_(node, 'open', '(');
  246. const close = SemanticMathml.getAttribute_(node, 'close', ')');
  247. const newNode = SemanticProcessor.getInstance().mfenced(open, close, sepValue, semNodes);
  248. const nodes = SemanticProcessor.getInstance().tablesInRow([newNode]);
  249. return nodes[0];
  250. }
  251. enclosed_(node, children) {
  252. const semNodes = this.parseList(SemanticUtil.purgeNodes(children));
  253. const newNode = this.getFactory().makeBranchNode(SemanticType.ENCLOSE, [SemanticProcessor.getInstance().row(semNodes)], []);
  254. newNode.role =
  255. node.getAttribute('notation') || SemanticRole.UNKNOWN;
  256. return newNode;
  257. }
  258. multiscripts_(_node, children) {
  259. if (!children.length) {
  260. return this.getFactory().makeEmptyNode();
  261. }
  262. const base = this.parse(children.shift());
  263. if (!children.length) {
  264. return base;
  265. }
  266. const lsup = [];
  267. const lsub = [];
  268. const rsup = [];
  269. const rsub = [];
  270. let prescripts = false;
  271. let scriptcount = 0;
  272. for (let i = 0, child; (child = children[i]); i++) {
  273. if (DomUtil.tagName(child) === MMLTAGS.MPRESCRIPTS) {
  274. prescripts = true;
  275. scriptcount = 0;
  276. continue;
  277. }
  278. prescripts
  279. ? scriptcount & 1
  280. ? lsup.push(child)
  281. : lsub.push(child)
  282. : scriptcount & 1
  283. ? rsup.push(child)
  284. : rsub.push(child);
  285. scriptcount++;
  286. }
  287. if (!SemanticUtil.purgeNodes(lsup).length &&
  288. !SemanticUtil.purgeNodes(lsub).length) {
  289. return SemanticProcessor.getInstance().pseudoTensor(base, this.parseList(rsub), this.parseList(rsup));
  290. }
  291. return SemanticProcessor.getInstance().tensor(base, this.parseList(lsub), this.parseList(lsup), this.parseList(rsub), this.parseList(rsup));
  292. }
  293. empty_(_node, _children) {
  294. return this.getFactory().makeEmptyNode();
  295. }
  296. action_(node, children) {
  297. const selection = children[node.hasAttribute('selection')
  298. ? parseInt(node.getAttribute('selection'), 10) - 1
  299. : 0];
  300. const stree = this.parse(selection);
  301. stree.mathmlTree = selection;
  302. return stree;
  303. }
  304. dummy_(node, _children) {
  305. const unknown = this.getFactory().makeUnprocessed(node);
  306. unknown.role = node.tagName;
  307. unknown.textContent = node.textContent;
  308. return unknown;
  309. }
  310. leaf_(mml, children) {
  311. if (children.length === 1 &&
  312. children[0].nodeType !== DomUtil.NodeType.TEXT_NODE) {
  313. const node = this.getFactory().makeUnprocessed(mml);
  314. node.role = children[0].tagName;
  315. SemanticUtil.addAttributes(node, children[0]);
  316. return node;
  317. }
  318. const node = this.getFactory().makeLeafNode(mml.textContent, SemanticProcessor.getInstance().font(mml.getAttribute('mathvariant')));
  319. if (mml.hasAttribute('data-latex')) {
  320. SemanticMap.LatexCommands.set(mml.getAttribute('data-latex'), mml.textContent);
  321. }
  322. return node;
  323. }
  324. }