enrich_mathml.js 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. import { Debugger } from '../common/debugger.js';
  2. import * as DomUtil from '../common/dom_util.js';
  3. import { Engine } from '../common/engine.js';
  4. import { NamedSymbol } from '../semantic_tree/semantic_attr.js';
  5. import { SemanticRole, SemanticType } from '../semantic_tree/semantic_meaning.js';
  6. import { SemanticHeuristics } from '../semantic_tree/semantic_heuristic_factory.js';
  7. import { SemanticSkeleton } from '../semantic_tree/semantic_skeleton.js';
  8. import * as SemanticUtil from '../semantic_tree/semantic_util.js';
  9. import { MMLTAGS } from '../semantic_tree/semantic_util.js';
  10. import * as EnrichAttr from './enrich_attr.js';
  11. import { getCase } from './enrich_case.js';
  12. const SETTINGS = {
  13. collapsed: true,
  14. implicit: true,
  15. wiki: true
  16. };
  17. const IDS = new Map();
  18. export function enrich(mml, semantic) {
  19. IDS.clear();
  20. const oldMml = DomUtil.cloneNode(mml);
  21. walkTree(semantic.root);
  22. if (Engine.getInstance().structure) {
  23. mml.setAttribute(EnrichAttr.Attribute.STRUCTURE, SemanticSkeleton.fromStructure(mml, semantic).toString());
  24. }
  25. Debugger.getInstance().generateOutput(() => [
  26. formattedOutput(oldMml, 'Original MathML', SETTINGS.wiki),
  27. formattedOutput(semantic, 'Semantic Tree', SETTINGS.wiki),
  28. formattedOutput(mml, 'Semantically enriched MathML', SETTINGS.wiki)
  29. ]);
  30. return mml;
  31. }
  32. export function walkTree(semantic) {
  33. Debugger.getInstance().output('WALKING START: ' + semantic.toString());
  34. const specialCase = getCase(semantic);
  35. let newNode;
  36. if (specialCase) {
  37. newNode = specialCase.getMathml();
  38. Debugger.getInstance().output('WALKING END: ' + semantic.toString());
  39. return ascendNewNode(newNode);
  40. }
  41. if (semantic.mathml.length === 1) {
  42. Debugger.getInstance().output('Walktree Case 0');
  43. if (!semantic.childNodes.length) {
  44. Debugger.getInstance().output('Walktree Case 0.1');
  45. newNode = semantic.mathml[0];
  46. EnrichAttr.setAttributes(newNode, semantic);
  47. Debugger.getInstance().output('WALKING END: ' + semantic.toString());
  48. return ascendNewNode(newNode);
  49. }
  50. const fchild = semantic.childNodes[0];
  51. if (semantic.childNodes.length === 1 &&
  52. fchild.type === SemanticType.EMPTY) {
  53. Debugger.getInstance().output('Walktree Case 0.2');
  54. newNode = semantic.mathml[0];
  55. EnrichAttr.setAttributes(newNode, semantic);
  56. newNode.appendChild(walkTree(fchild));
  57. Debugger.getInstance().output('WALKING END: ' + semantic.toString());
  58. return ascendNewNode(newNode);
  59. }
  60. semantic.childNodes.forEach((child) => {
  61. if (!child.mathml.length) {
  62. child.mathml = [createInvisibleOperator(child)];
  63. }
  64. });
  65. }
  66. const newContent = semantic.contentNodes.map(cloneContentNode);
  67. setOperatorAttribute(semantic, newContent);
  68. const newChildren = semantic.childNodes.map(walkTree);
  69. const childrenList = SemanticSkeleton.combineContentChildren(semantic.type, semantic.role, newContent, newChildren);
  70. newNode = semantic.mathmlTree;
  71. if (newNode === null) {
  72. Debugger.getInstance().output('Walktree Case 1');
  73. newNode = introduceNewLayer(childrenList, semantic);
  74. }
  75. else {
  76. const attached = attachedElement(childrenList);
  77. Debugger.getInstance().output('Walktree Case 2');
  78. if (attached) {
  79. Debugger.getInstance().output('Walktree Case 2.1');
  80. newNode = parentNode(attached);
  81. }
  82. else {
  83. Debugger.getInstance().output('Walktree Case 2.2');
  84. newNode = getInnerNode(newNode);
  85. }
  86. }
  87. newNode = rewriteMfenced(newNode);
  88. mergeChildren(newNode, childrenList, semantic);
  89. if (!IDS.has(semantic.id)) {
  90. IDS.set(semantic.id, true);
  91. EnrichAttr.setAttributes(newNode, semantic);
  92. }
  93. Debugger.getInstance().output('WALKING END: ' + semantic.toString());
  94. return ascendNewNode(newNode);
  95. }
  96. export function introduceNewLayer(children, semantic) {
  97. const lca = mathmlLca(children);
  98. let newNode = lca.node;
  99. const info = lca.type;
  100. if (info !== lcaType.VALID ||
  101. !SemanticUtil.hasEmptyTag(newNode) ||
  102. (!newNode.parentNode && semantic.parent)) {
  103. Debugger.getInstance().output('Walktree Case 1.1');
  104. newNode = EnrichAttr.addMrow();
  105. if (info === lcaType.PRUNED) {
  106. Debugger.getInstance().output('Walktree Case 1.1.0');
  107. newNode = introduceLayerAboveLca(newNode, lca.node, children);
  108. }
  109. else if (children[0]) {
  110. Debugger.getInstance().output('Walktree Case 1.1.1');
  111. const node = attachedElement(children);
  112. if (node) {
  113. const oldChildren = childrenSubset(parentNode(node), children);
  114. DomUtil.replaceNode(node, newNode);
  115. oldChildren.forEach(function (x) {
  116. newNode.appendChild(x);
  117. });
  118. }
  119. else {
  120. moveSemanticAttributes(newNode, children[0]);
  121. newNode = children[0];
  122. }
  123. }
  124. }
  125. if (!semantic.mathmlTree) {
  126. semantic.mathmlTree = newNode;
  127. }
  128. return newNode;
  129. }
  130. function introduceLayerAboveLca(mrow, lca, children) {
  131. let innerNode = descendNode(lca);
  132. if (SemanticUtil.hasMathTag(innerNode)) {
  133. Debugger.getInstance().output('Walktree Case 1.1.0.0');
  134. moveSemanticAttributes(innerNode, mrow);
  135. DomUtil.toArray(innerNode.childNodes).forEach(function (x) {
  136. mrow.appendChild(x);
  137. });
  138. const auxNode = mrow;
  139. mrow = innerNode;
  140. innerNode = auxNode;
  141. }
  142. const index = children.indexOf(lca);
  143. children[index] = innerNode;
  144. DomUtil.replaceNode(innerNode, mrow);
  145. mrow.appendChild(innerNode);
  146. children.forEach(function (x) {
  147. mrow.appendChild(x);
  148. });
  149. return mrow;
  150. }
  151. function moveSemanticAttributes(oldNode, newNode) {
  152. for (const attr of EnrichAttr.EnrichAttributes) {
  153. if (oldNode.hasAttribute(attr)) {
  154. newNode.setAttribute(attr, oldNode.getAttribute(attr));
  155. oldNode.removeAttribute(attr);
  156. }
  157. }
  158. }
  159. function childrenSubset(node, newChildren) {
  160. const oldChildren = DomUtil.toArray(node.childNodes);
  161. let leftIndex = +Infinity;
  162. let rightIndex = -Infinity;
  163. newChildren.forEach(function (child) {
  164. const index = oldChildren.indexOf(child);
  165. if (index !== -1) {
  166. leftIndex = Math.min(leftIndex, index);
  167. rightIndex = Math.max(rightIndex, index);
  168. }
  169. });
  170. return oldChildren.slice(leftIndex, rightIndex + 1);
  171. }
  172. function collateChildNodes(node, children, semantic) {
  173. const oldChildren = [];
  174. let newChildren = DomUtil.toArray(node.childNodes);
  175. let notFirst = false;
  176. while (newChildren.length) {
  177. const child = newChildren.shift();
  178. if (child.hasAttribute(EnrichAttr.Attribute.TYPE)) {
  179. oldChildren.push(child);
  180. continue;
  181. }
  182. const collect = collectChildNodes(child, children);
  183. if (collect.length === 0) {
  184. continue;
  185. }
  186. if (collect.length === 1) {
  187. oldChildren.push(child);
  188. continue;
  189. }
  190. if (notFirst) {
  191. child.setAttribute('AuxiliaryImplicit', true);
  192. }
  193. else {
  194. notFirst = true;
  195. }
  196. newChildren = collect.concat(newChildren);
  197. }
  198. const rear = [];
  199. const semChildren = semantic.childNodes.map(function (x) {
  200. return x.mathmlTree;
  201. });
  202. while (semChildren.length) {
  203. const schild = semChildren.pop();
  204. if (!schild) {
  205. continue;
  206. }
  207. if (oldChildren.indexOf(schild) !== -1) {
  208. break;
  209. }
  210. if (children.indexOf(schild) !== -1) {
  211. rear.unshift(schild);
  212. }
  213. }
  214. return oldChildren.concat(rear);
  215. }
  216. function collectChildNodes(node, children) {
  217. const collect = [];
  218. let newChildren = DomUtil.toArray(node.childNodes);
  219. while (newChildren.length) {
  220. const child = newChildren.shift();
  221. if (child.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
  222. continue;
  223. }
  224. if (child.hasAttribute(EnrichAttr.Attribute.TYPE) ||
  225. children.indexOf(child) !== -1) {
  226. collect.push(child);
  227. continue;
  228. }
  229. newChildren = DomUtil.toArray(child.childNodes).concat(newChildren);
  230. }
  231. return collect;
  232. }
  233. function mergeChildren(node, newChildren, semantic) {
  234. if (!newChildren.length)
  235. return;
  236. if (newChildren.length === 1 && node === newChildren[0])
  237. return;
  238. const oldChildren = semantic.role === SemanticRole.IMPLICIT &&
  239. SemanticHeuristics.flags.combine_juxtaposition
  240. ? collateChildNodes(node, newChildren, semantic)
  241. : DomUtil.toArray(node.childNodes);
  242. if (!oldChildren.length) {
  243. newChildren.forEach(function (x) {
  244. node.appendChild(x);
  245. });
  246. return;
  247. }
  248. let oldCounter = 0;
  249. while (newChildren.length) {
  250. const newChild = newChildren[0];
  251. if (oldChildren[oldCounter] === newChild ||
  252. functionApplication(oldChildren[oldCounter], newChild)) {
  253. newChildren.shift();
  254. oldCounter++;
  255. continue;
  256. }
  257. if (oldChildren[oldCounter] &&
  258. newChildren.indexOf(oldChildren[oldCounter]) === -1) {
  259. oldCounter++;
  260. continue;
  261. }
  262. if (isDescendant(newChild, node)) {
  263. newChildren.shift();
  264. continue;
  265. }
  266. const oldChild = oldChildren[oldCounter];
  267. if (!oldChild) {
  268. if (newChild.parentNode) {
  269. node = parentNode(newChild);
  270. newChildren.shift();
  271. continue;
  272. }
  273. const nextChild = newChildren[1];
  274. if (nextChild && nextChild.parentNode) {
  275. node = parentNode(nextChild);
  276. node.insertBefore(newChild, nextChild);
  277. newChildren.shift();
  278. newChildren.shift();
  279. continue;
  280. }
  281. node.insertBefore(newChild, null);
  282. newChildren.shift();
  283. continue;
  284. }
  285. insertNewChild(node, oldChild, newChild);
  286. newChildren.shift();
  287. }
  288. }
  289. function insertNewChild(node, oldChild, newChild) {
  290. let parent = oldChild;
  291. let next = parentNode(parent);
  292. while (next &&
  293. next.firstChild === parent &&
  294. !parent.hasAttribute('AuxiliaryImplicit') &&
  295. next !== node) {
  296. parent = next;
  297. next = parentNode(parent);
  298. }
  299. if (next) {
  300. next.insertBefore(newChild, parent);
  301. parent.removeAttribute('AuxiliaryImplicit');
  302. }
  303. }
  304. function isDescendant(child, node) {
  305. if (!child) {
  306. return false;
  307. }
  308. do {
  309. child = parentNode(child);
  310. if (child === node) {
  311. return true;
  312. }
  313. } while (child);
  314. return false;
  315. }
  316. function functionApplication(oldNode, newNode) {
  317. const appl = NamedSymbol.functionApplication;
  318. if (oldNode &&
  319. newNode &&
  320. oldNode.textContent &&
  321. newNode.textContent &&
  322. oldNode.textContent === appl &&
  323. newNode.textContent === appl &&
  324. newNode.getAttribute(EnrichAttr.Attribute.ADDED) === 'true') {
  325. for (let i = 0, attr; (attr = oldNode.attributes[i]); i++) {
  326. if (!newNode.hasAttribute(attr.nodeName)) {
  327. newNode.setAttribute(attr.nodeName, attr.nodeValue);
  328. }
  329. }
  330. DomUtil.replaceNode(oldNode, newNode);
  331. return true;
  332. }
  333. return false;
  334. }
  335. var lcaType;
  336. (function (lcaType) {
  337. lcaType["VALID"] = "valid";
  338. lcaType["INVALID"] = "invalid";
  339. lcaType["PRUNED"] = "pruned";
  340. })(lcaType || (lcaType = {}));
  341. function mathmlLca(children) {
  342. const leftMost = attachedElement(children);
  343. if (!leftMost) {
  344. return { type: lcaType.INVALID, node: null };
  345. }
  346. const rightMost = attachedElement(children.slice().reverse());
  347. if (leftMost === rightMost) {
  348. return { type: lcaType.VALID, node: leftMost };
  349. }
  350. const leftPath = pathToRoot(leftMost);
  351. const newLeftPath = prunePath(leftPath, children);
  352. const rightPath = pathToRoot(rightMost, function (x) {
  353. return newLeftPath.indexOf(x) !== -1;
  354. });
  355. const lca = rightPath[0];
  356. const lIndex = newLeftPath.indexOf(lca);
  357. if (lIndex === -1) {
  358. return { type: lcaType.INVALID, node: null };
  359. }
  360. return {
  361. type: newLeftPath.length !== leftPath.length
  362. ? lcaType.PRUNED
  363. : validLca(newLeftPath[lIndex + 1], rightPath[1])
  364. ? lcaType.VALID
  365. : lcaType.INVALID,
  366. node: lca
  367. };
  368. }
  369. function prunePath(path, children) {
  370. let i = 0;
  371. while (path[i] && children.indexOf(path[i]) === -1) {
  372. i++;
  373. }
  374. return path.slice(0, i + 1);
  375. }
  376. function attachedElement(nodes) {
  377. let count = 0;
  378. let attached = null;
  379. while (!attached && count < nodes.length) {
  380. if (nodes[count].parentNode) {
  381. attached = nodes[count];
  382. }
  383. count++;
  384. }
  385. return attached;
  386. }
  387. function pathToRoot(node, opt_test) {
  388. const test = opt_test || ((_x) => false);
  389. const path = [node];
  390. while (!test(node) && !SemanticUtil.hasMathTag(node) && node.parentNode) {
  391. node = parentNode(node);
  392. path.unshift(node);
  393. }
  394. return path;
  395. }
  396. function validLca(left, right) {
  397. return !!(left && right && !left.previousSibling && !right.nextSibling);
  398. }
  399. export function ascendNewNode(newNode) {
  400. while (!SemanticUtil.hasMathTag(newNode) && unitChild(newNode)) {
  401. newNode = parentNode(newNode);
  402. }
  403. return newNode;
  404. }
  405. function descendNode(node) {
  406. const children = DomUtil.toArray(node.childNodes);
  407. if (!children) {
  408. return node;
  409. }
  410. const remainder = children.filter(function (child) {
  411. return (child.nodeType === DomUtil.NodeType.ELEMENT_NODE &&
  412. !SemanticUtil.hasIgnoreTag(child));
  413. });
  414. if (remainder.length === 1 &&
  415. SemanticUtil.hasEmptyTag(remainder[0]) &&
  416. !remainder[0].hasAttribute(EnrichAttr.Attribute.TYPE)) {
  417. return descendNode(remainder[0]);
  418. }
  419. return node;
  420. }
  421. function unitChild(node) {
  422. const parent = parentNode(node);
  423. if (!parent || !SemanticUtil.hasEmptyTag(parent)) {
  424. return false;
  425. }
  426. return DomUtil.toArray(parent.childNodes).every(function (child) {
  427. return child === node || isIgnorable(child);
  428. });
  429. }
  430. function isIgnorable(node) {
  431. if (node.nodeType !== DomUtil.NodeType.ELEMENT_NODE) {
  432. return true;
  433. }
  434. if (!node || SemanticUtil.hasIgnoreTag(node)) {
  435. return true;
  436. }
  437. const children = DomUtil.toArray(node.childNodes);
  438. if ((!SemanticUtil.hasEmptyTag(node) && children.length) ||
  439. SemanticUtil.hasDisplayTag(node) ||
  440. node.hasAttribute(EnrichAttr.Attribute.TYPE) ||
  441. SemanticUtil.isOrphanedGlyph(node)) {
  442. return false;
  443. }
  444. return DomUtil.toArray(node.childNodes).every(isIgnorable);
  445. }
  446. function parentNode(element) {
  447. return element.parentNode;
  448. }
  449. export function addCollapsedAttribute(node, collapsed) {
  450. const skeleton = new SemanticSkeleton(collapsed);
  451. node.setAttribute(EnrichAttr.Attribute.COLLAPSED, skeleton.toString());
  452. }
  453. export function cloneContentNode(content) {
  454. if (content.mathml.length) {
  455. return walkTree(content);
  456. }
  457. const clone = SETTINGS.implicit
  458. ? createInvisibleOperator(content)
  459. : EnrichAttr.addMrow();
  460. content.mathml = [clone];
  461. return clone;
  462. }
  463. export function rewriteMfenced(mml) {
  464. if (DomUtil.tagName(mml) !== MMLTAGS.MFENCED) {
  465. return mml;
  466. }
  467. const newNode = EnrichAttr.addMrow();
  468. for (let i = 0, attr; (attr = mml.attributes[i]); i++) {
  469. if (['open', 'close', 'separators'].indexOf(attr.name) === -1) {
  470. newNode.setAttribute(attr.name, attr.value);
  471. }
  472. }
  473. DomUtil.toArray(mml.childNodes).forEach(function (x) {
  474. newNode.appendChild(x);
  475. });
  476. DomUtil.replaceNode(mml, newNode);
  477. return newNode;
  478. }
  479. function createInvisibleOperator(operator) {
  480. const moNode = DomUtil.createElement('mo');
  481. const text = DomUtil.createTextNode(operator.textContent);
  482. moNode.appendChild(text);
  483. EnrichAttr.setAttributes(moNode, operator);
  484. moNode.setAttribute(EnrichAttr.Attribute.ADDED, 'true');
  485. return moNode;
  486. }
  487. export function setOperatorAttribute(semantic, content) {
  488. const operator = semantic.type + (semantic.textContent ? ',' + semantic.textContent : '');
  489. content.forEach(function (c) {
  490. getInnerNode(c).setAttribute(EnrichAttr.Attribute.OPERATOR, operator);
  491. });
  492. }
  493. export function getInnerNode(node) {
  494. const children = DomUtil.toArray(node.childNodes);
  495. if (!children) {
  496. return node;
  497. }
  498. const remainder = children.filter(function (child) {
  499. return !isIgnorable(child);
  500. });
  501. const result = [];
  502. for (let i = 0, remain; (remain = remainder[i]); i++) {
  503. if (SemanticUtil.hasEmptyTag(remain) &&
  504. remain.getAttribute(EnrichAttr.Attribute.TYPE) !==
  505. SemanticType.PUNCTUATION) {
  506. const nextInner = getInnerNode(remain);
  507. if (nextInner && nextInner !== remain) {
  508. result.push(nextInner);
  509. }
  510. }
  511. else {
  512. result.push(remain);
  513. }
  514. }
  515. if (result.length === 1) {
  516. return result[0];
  517. }
  518. return node;
  519. }
  520. function formattedOutput(element, name, wiki = false) {
  521. const output = EnrichAttr.removeAttributePrefix(DomUtil.formatXml(element.toString()));
  522. return wiki ? name + ':\n```html\n' + output + '\n```\n' : output;
  523. }
  524. export function collapsePunctuated(semantic, opt_children) {
  525. const optional = !!opt_children;
  526. const children = opt_children || [];
  527. const parent = semantic.parent;
  528. const contentIds = semantic.contentNodes.map(function (x) {
  529. return x.id;
  530. });
  531. contentIds.unshift('c');
  532. const childIds = [semantic.id, contentIds];
  533. for (let i = 0, child; (child = semantic.childNodes[i]); i++) {
  534. const mmlChild = walkTree(child);
  535. children.push(mmlChild);
  536. const innerNode = getInnerNode(mmlChild);
  537. if (parent && !optional) {
  538. innerNode.setAttribute(EnrichAttr.Attribute.PARENT, parent.id.toString());
  539. }
  540. childIds.push(child.id);
  541. }
  542. return childIds;
  543. }