semantic_heuristics.js 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. import { Debugger } from '../common/debugger.js';
  2. import { Engine } from '../common/engine.js';
  3. import { SemanticMap, NamedSymbol } from './semantic_attr.js';
  4. import { SemanticHeuristics } from './semantic_heuristic_factory.js';
  5. import { SemanticTreeHeuristic, SemanticMmlHeuristic, SemanticMultiHeuristic } from './semantic_heuristic.js';
  6. import { SemanticRole, SemanticType } from './semantic_meaning.js';
  7. import * as SemanticPred from './semantic_pred.js';
  8. import { SemanticProcessor } from './semantic_processor.js';
  9. import * as SemanticUtil from './semantic_util.js';
  10. import { SemanticSkeleton } from './semantic_skeleton.js';
  11. import { MMLTAGS } from '../semantic_tree/semantic_util.js';
  12. import * as DomUtil from '../common/dom_util.js';
  13. SemanticHeuristics.add(new SemanticTreeHeuristic('combine_juxtaposition', combineJuxtaposition));
  14. function combineJuxtaposition(root) {
  15. for (let i = root.childNodes.length - 1, child; (child = root.childNodes[i]); i--) {
  16. if (!SemanticPred.isImplicitOp(child) || child.nobreaking) {
  17. continue;
  18. }
  19. root.childNodes.splice(i, 1, ...child.childNodes);
  20. root.contentNodes.splice(i, 0, ...child.contentNodes);
  21. child.childNodes.concat(child.contentNodes).forEach(function (x) {
  22. x.parent = root;
  23. });
  24. root.addMathmlNodes(child.mathml);
  25. }
  26. return root;
  27. }
  28. SemanticHeuristics.add(new SemanticTreeHeuristic('propagateSimpleFunction', (node) => {
  29. if ((node.type === SemanticType.INFIXOP ||
  30. node.type === SemanticType.FRACTION) &&
  31. node.childNodes.every(SemanticPred.isSimpleFunction)) {
  32. node.role = SemanticRole.COMPFUNC;
  33. }
  34. return node;
  35. }, (_node) => Engine.getInstance().domain === 'clearspeak'));
  36. SemanticHeuristics.add(new SemanticTreeHeuristic('simpleNamedFunction', (node) => {
  37. const specialFunctions = ['f', 'g', 'h', 'F', 'G', 'H'];
  38. if (node.role !== SemanticRole.UNIT &&
  39. specialFunctions.indexOf(node.textContent) !== -1) {
  40. node.role = SemanticRole.SIMPLEFUNC;
  41. }
  42. return node;
  43. }, (_node) => Engine.getInstance().domain === 'clearspeak'));
  44. SemanticHeuristics.add(new SemanticTreeHeuristic('propagateComposedFunction', (node) => {
  45. if (node.type === SemanticType.FENCED &&
  46. node.childNodes[0].role === SemanticRole.COMPFUNC) {
  47. node.role = SemanticRole.COMPFUNC;
  48. }
  49. return node;
  50. }, (_node) => Engine.getInstance().domain === 'clearspeak'));
  51. SemanticHeuristics.add(new SemanticTreeHeuristic('multioperator', (node) => {
  52. if (node.role !== SemanticRole.UNKNOWN || node.textContent.length <= 1) {
  53. return;
  54. }
  55. SemanticProcessor.compSemantics(node, 'role', SemanticRole);
  56. SemanticProcessor.compSemantics(node, 'type', SemanticType);
  57. }));
  58. SemanticHeuristics.add(new SemanticMultiHeuristic('convert_juxtaposition', (nodes) => {
  59. let partition = SemanticUtil.partitionNodes(nodes, function (x) {
  60. return (x.textContent === NamedSymbol.invisibleTimes &&
  61. x.type === SemanticType.OPERATOR);
  62. });
  63. partition = partition.rel.length
  64. ? juxtapositionPrePost(partition)
  65. : partition;
  66. nodes = partition.comp[0];
  67. for (let i = 1, c, r; (c = partition.comp[i]), (r = partition.rel[i - 1]); i++) {
  68. nodes.push(r);
  69. nodes = nodes.concat(c);
  70. }
  71. partition = SemanticUtil.partitionNodes(nodes, function (x) {
  72. return (x.textContent === NamedSymbol.invisibleTimes &&
  73. (x.type === SemanticType.OPERATOR || x.type === SemanticType.INFIXOP));
  74. });
  75. if (!partition.rel.length) {
  76. return nodes;
  77. }
  78. return recurseJuxtaposition(partition.comp.shift(), partition.rel, partition.comp);
  79. }));
  80. SemanticHeuristics.add(new SemanticTreeHeuristic('simple2prefix', (node) => {
  81. if (node.textContent.length > 1 &&
  82. !node.textContent[0].match(/[A-Z]/)) {
  83. node.role = SemanticRole.PREFIXFUNC;
  84. }
  85. return node;
  86. }, (node) => Engine.getInstance().modality === 'braille' &&
  87. node.type === SemanticType.IDENTIFIER));
  88. SemanticHeuristics.add(new SemanticTreeHeuristic('detect_cycle', (node) => {
  89. node.type = SemanticType.MATRIX;
  90. node.role = SemanticRole.CYCLE;
  91. const row = node.childNodes[0];
  92. row.type = SemanticType.ROW;
  93. row.role = SemanticRole.CYCLE;
  94. row.textContent = '';
  95. row.contentNodes = [];
  96. return node;
  97. }, (node) => node.type === SemanticType.FENCED &&
  98. node.childNodes[0].type === SemanticType.INFIXOP &&
  99. node.childNodes[0].role === SemanticRole.IMPLICIT &&
  100. node.childNodes[0].childNodes.every(function (x) {
  101. return x.type === SemanticType.NUMBER;
  102. }) &&
  103. node.childNodes[0].contentNodes.every(function (x) {
  104. return x.role === SemanticRole.SPACE;
  105. })));
  106. function juxtapositionPrePost(partition) {
  107. const rels = [];
  108. const comps = [];
  109. let next = partition.comp.shift();
  110. let rel = null;
  111. let collect = [];
  112. while (partition.comp.length) {
  113. collect = [];
  114. if (next.length) {
  115. if (rel) {
  116. rels.push(rel);
  117. }
  118. comps.push(next);
  119. rel = partition.rel.shift();
  120. next = partition.comp.shift();
  121. continue;
  122. }
  123. if (rel) {
  124. collect.push(rel);
  125. }
  126. while (!next.length && partition.comp.length) {
  127. next = partition.comp.shift();
  128. collect.push(partition.rel.shift());
  129. }
  130. rel = convertPrePost(collect, next, comps);
  131. }
  132. if (!collect.length && !next.length) {
  133. collect.push(rel);
  134. convertPrePost(collect, next, comps);
  135. }
  136. else {
  137. rels.push(rel);
  138. comps.push(next);
  139. }
  140. return { rel: rels, comp: comps };
  141. }
  142. function convertPrePost(collect, next, comps) {
  143. let rel = null;
  144. if (!collect.length) {
  145. return rel;
  146. }
  147. const prev = comps[comps.length - 1];
  148. const prevExists = prev && prev.length;
  149. const nextExists = next && next.length;
  150. const processor = SemanticProcessor.getInstance();
  151. if (prevExists && nextExists) {
  152. if (next[0].type === SemanticType.INFIXOP &&
  153. next[0].role === SemanticRole.IMPLICIT) {
  154. rel = collect.pop();
  155. prev.push(processor['postfixNode_'](prev.pop(), collect));
  156. return rel;
  157. }
  158. rel = collect.shift();
  159. const result = processor['prefixNode_'](next.shift(), collect);
  160. next.unshift(result);
  161. return rel;
  162. }
  163. if (prevExists) {
  164. prev.push(processor['postfixNode_'](prev.pop(), collect));
  165. return rel;
  166. }
  167. if (nextExists) {
  168. next.unshift(processor['prefixNode_'](next.shift(), collect));
  169. }
  170. return rel;
  171. }
  172. function recurseJuxtaposition(acc, ops, elements) {
  173. if (!ops.length) {
  174. return acc;
  175. }
  176. const left = acc.pop();
  177. const op = ops.shift();
  178. const first = elements.shift();
  179. if (op.type === SemanticType.INFIXOP &&
  180. (op.role === SemanticRole.IMPLICIT || op.role === SemanticRole.UNIT)) {
  181. Debugger.getInstance().output('Juxta Heuristic Case 2');
  182. const right = (left ? [left, op] : [op]).concat(first);
  183. return recurseJuxtaposition(acc.concat(right), ops, elements);
  184. }
  185. if (!left) {
  186. Debugger.getInstance().output('Juxta Heuristic Case 3');
  187. return recurseJuxtaposition([op].concat(first), ops, elements);
  188. }
  189. const right = first.shift();
  190. if (!right) {
  191. Debugger.getInstance().output('Juxta Heuristic Case 9');
  192. const newOp = SemanticHeuristics.factory.makeBranchNode(SemanticType.INFIXOP, [left, ops.shift()], [op], op.textContent);
  193. newOp.role = SemanticRole.IMPLICIT;
  194. SemanticHeuristics.run('combine_juxtaposition', newOp);
  195. ops.unshift(newOp);
  196. return recurseJuxtaposition(acc, ops, elements);
  197. }
  198. if (SemanticPred.isOperator(left) || SemanticPred.isOperator(right)) {
  199. Debugger.getInstance().output('Juxta Heuristic Case 4');
  200. return recurseJuxtaposition(acc.concat([left, op, right]).concat(first), ops, elements);
  201. }
  202. let result = null;
  203. if (SemanticPred.isImplicitOp(left) && SemanticPred.isImplicitOp(right)) {
  204. Debugger.getInstance().output('Juxta Heuristic Case 5');
  205. left.contentNodes.push(op);
  206. left.contentNodes = left.contentNodes.concat(right.contentNodes);
  207. left.childNodes.push(right);
  208. left.childNodes = left.childNodes.concat(right.childNodes);
  209. right.childNodes.forEach((x) => (x.parent = left));
  210. op.parent = left;
  211. left.addMathmlNodes(op.mathml);
  212. left.addMathmlNodes(right.mathml);
  213. result = left;
  214. }
  215. else if (SemanticPred.isImplicitOp(left)) {
  216. Debugger.getInstance().output('Juxta Heuristic Case 6');
  217. left.contentNodes.push(op);
  218. left.childNodes.push(right);
  219. right.parent = left;
  220. op.parent = left;
  221. left.addMathmlNodes(op.mathml);
  222. left.addMathmlNodes(right.mathml);
  223. result = left;
  224. }
  225. else if (SemanticPred.isImplicitOp(right)) {
  226. Debugger.getInstance().output('Juxta Heuristic Case 7');
  227. right.contentNodes.unshift(op);
  228. right.childNodes.unshift(left);
  229. left.parent = right;
  230. op.parent = right;
  231. right.addMathmlNodes(op.mathml);
  232. right.addMathmlNodes(left.mathml);
  233. result = right;
  234. }
  235. else {
  236. Debugger.getInstance().output('Juxta Heuristic Case 8');
  237. result = SemanticHeuristics.factory.makeBranchNode(SemanticType.INFIXOP, [left, right], [op], op.textContent);
  238. result.role = SemanticRole.IMPLICIT;
  239. }
  240. acc.push(result);
  241. return recurseJuxtaposition(acc.concat(first), ops, elements);
  242. }
  243. SemanticHeuristics.add(new SemanticMultiHeuristic('intvar_from_implicit', implicitUnpack, (nodes) => nodes[0] && SemanticPred.isImplicit(nodes[0])));
  244. function implicitUnpack(nodes) {
  245. const children = nodes[0].childNodes;
  246. nodes.splice(0, 1, ...children);
  247. }
  248. SemanticHeuristics.add(new SemanticTreeHeuristic('intvar_from_fraction', integralFractionArg, (node) => {
  249. if (node.type !== SemanticType.INTEGRAL)
  250. return false;
  251. const [, integrand, intvar] = node.childNodes;
  252. return (intvar.type === SemanticType.EMPTY &&
  253. integrand.type === SemanticType.FRACTION);
  254. }));
  255. function integralFractionArg(node) {
  256. const integrand = node.childNodes[1];
  257. const enumerator = integrand.childNodes[0];
  258. if (SemanticPred.isIntegralDxBoundarySingle(enumerator)) {
  259. enumerator.role = SemanticRole.INTEGRAL;
  260. return;
  261. }
  262. if (!SemanticPred.isImplicit(enumerator))
  263. return;
  264. const length = enumerator.childNodes.length;
  265. const first = enumerator.childNodes[length - 2];
  266. const second = enumerator.childNodes[length - 1];
  267. if (SemanticPred.isIntegralDxBoundarySingle(second)) {
  268. second.role = SemanticRole.INTEGRAL;
  269. return;
  270. }
  271. if (SemanticPred.isIntegralDxBoundary(first, second)) {
  272. const prefix = SemanticProcessor.getInstance()['prefixNode_'](second, [
  273. first
  274. ]);
  275. prefix.role = SemanticRole.INTEGRAL;
  276. if (length === 2) {
  277. integrand.childNodes[0] = prefix;
  278. }
  279. else {
  280. enumerator.childNodes.pop();
  281. enumerator.contentNodes.pop();
  282. enumerator.childNodes[length - 2] = prefix;
  283. prefix.parent = enumerator;
  284. }
  285. }
  286. }
  287. SemanticHeuristics.add(new SemanticTreeHeuristic('rewrite_subcases', rewriteSubcasesTable, (table) => {
  288. let left = true;
  289. let right = true;
  290. const topLeft = table.childNodes[0].childNodes[0];
  291. if (!eligibleNode(topLeft.mathmlTree)) {
  292. left = false;
  293. }
  294. else {
  295. for (let i = 1, row; (row = table.childNodes[i]); i++) {
  296. if (row.childNodes[0].childNodes.length) {
  297. left = false;
  298. break;
  299. }
  300. }
  301. }
  302. if (left) {
  303. table.addAnnotation('Emph', 'left');
  304. }
  305. const topRight = table.childNodes[0].childNodes[table.childNodes[0].childNodes.length - 1];
  306. if (!eligibleNode(topRight.mathmlTree)) {
  307. right = false;
  308. }
  309. else {
  310. const firstRow = table.childNodes[0].childNodes.length;
  311. for (let i = 1, row; (row = table.childNodes[i]); i++) {
  312. if (row.childNodes.length >= firstRow) {
  313. right = false;
  314. break;
  315. }
  316. }
  317. }
  318. if (right) {
  319. table.addAnnotation('Emph', 'right');
  320. }
  321. return left || right;
  322. }));
  323. function eligibleNode(node) {
  324. return (node.childNodes[0] &&
  325. node.childNodes[0].childNodes[0] &&
  326. DomUtil.tagName(node.childNodes[0]) === MMLTAGS.MPADDED &&
  327. DomUtil.tagName(node.childNodes[0].childNodes[0]) ===
  328. MMLTAGS.MPADDED &&
  329. DomUtil.tagName(node.childNodes[0].childNodes[node.childNodes[0].childNodes.length - 1]) === MMLTAGS.MPHANTOM);
  330. }
  331. const rewritable = [
  332. SemanticType.PUNCTUATED,
  333. SemanticType.RELSEQ,
  334. SemanticType.MULTIREL,
  335. SemanticType.INFIXOP,
  336. SemanticType.PREFIXOP,
  337. SemanticType.POSTFIXOP
  338. ];
  339. function rewriteSubcasesTable(table) {
  340. table.addAnnotation('Emph', 'top');
  341. let row = [];
  342. if (table.hasAnnotation('Emph', 'left')) {
  343. const topLeft = table.childNodes[0].childNodes[0].childNodes[0];
  344. const cells = rewriteCell(topLeft, true);
  345. cells.forEach((x) => x.addAnnotation('Emph', 'left'));
  346. row = row.concat(cells);
  347. for (let i = 0, line; (line = table.childNodes[i]); i++) {
  348. line.childNodes.shift();
  349. }
  350. }
  351. row.push(table);
  352. if (table.hasAnnotation('Emph', 'right')) {
  353. const topRight = table.childNodes[0].childNodes[table.childNodes[0].childNodes.length - 1]
  354. .childNodes[0];
  355. const cells = rewriteCell(topRight);
  356. cells.forEach((x) => x.addAnnotation('Emph', 'left'));
  357. row = row.concat(cells);
  358. table.childNodes[0].childNodes.pop();
  359. }
  360. SemanticProcessor.tableToMultiline(table);
  361. const newNode = SemanticProcessor.getInstance().row(row);
  362. const annotation = table.annotation['Emph'];
  363. table.annotation['Emph'] = ['table'];
  364. annotation.forEach((x) => newNode.addAnnotation('Emph', x));
  365. return newNode;
  366. }
  367. function rewriteCell(cell, left) {
  368. if (!cell.childNodes.length) {
  369. rewriteFence(cell);
  370. return [cell];
  371. }
  372. let fence = null;
  373. if (cell.type === SemanticType.PUNCTUATED &&
  374. (left
  375. ? cell.role === SemanticRole.ENDPUNCT
  376. : cell.role === SemanticRole.STARTPUNCT)) {
  377. const children = cell.childNodes;
  378. if (rewriteFence(children[left ? children.length - 1 : 0])) {
  379. cell = children[left ? 0 : children.length - 1];
  380. fence = children[left ? children.length - 1 : 0];
  381. }
  382. }
  383. if (rewritable.indexOf(cell.type) !== -1) {
  384. const children = cell.childNodes;
  385. rewriteFence(children[left ? children.length - 1 : 0]);
  386. const newNodes = SemanticSkeleton.combineContentChildren(cell.type, cell.role, cell.contentNodes, cell.childNodes);
  387. if (fence) {
  388. if (left) {
  389. newNodes.push(fence);
  390. }
  391. else {
  392. newNodes.unshift(fence);
  393. }
  394. }
  395. return newNodes;
  396. }
  397. return fence ? (left ? [cell, fence] : [fence, cell]) : [cell];
  398. }
  399. const PUNCT_TO_FENCE_ = {
  400. [SemanticRole.METRIC]: SemanticRole.METRIC,
  401. [SemanticRole.VBAR]: SemanticRole.NEUTRAL,
  402. [SemanticRole.OPENFENCE]: SemanticRole.OPEN,
  403. [SemanticRole.CLOSEFENCE]: SemanticRole.CLOSE
  404. };
  405. function rewriteFence(fence) {
  406. if (fence.type !== SemanticType.PUNCTUATION) {
  407. return false;
  408. }
  409. const role = PUNCT_TO_FENCE_[fence.role];
  410. if (!role) {
  411. return false;
  412. }
  413. fence.role = role;
  414. fence.type = SemanticType.FENCE;
  415. fence.addAnnotation('Emph', 'fence');
  416. return true;
  417. }
  418. SemanticHeuristics.add(new SemanticMultiHeuristic('ellipses', (nodes) => {
  419. const newNodes = [];
  420. let current = nodes.shift();
  421. while (current) {
  422. [current, nodes] = combineNodes(current, nodes, SemanticRole.FULLSTOP, SemanticRole.ELLIPSIS);
  423. [current, nodes] = combineNodes(current, nodes, SemanticRole.DASH);
  424. newNodes.push(current);
  425. current = nodes.shift();
  426. }
  427. return newNodes;
  428. }, (nodes) => nodes.length > 1));
  429. function combineNodes(current, nodes, src, target = src) {
  430. const collect = [];
  431. while (current && current.role === src) {
  432. collect.push(current);
  433. current = nodes.shift();
  434. }
  435. if (!collect.length) {
  436. return [current, nodes];
  437. }
  438. if (current) {
  439. nodes.unshift(current);
  440. }
  441. return [
  442. collect.length === 1 ? collect[0] : combinedNodes(collect, target),
  443. nodes
  444. ];
  445. }
  446. function combinedNodes(nodes, role) {
  447. const node = SemanticHeuristics.factory.makeBranchNode(SemanticType.PUNCTUATION, nodes, []);
  448. node.role = role;
  449. return node;
  450. }
  451. SemanticHeuristics.add(new SemanticMultiHeuristic('op_with_limits', (nodes) => {
  452. const center = nodes[0];
  453. center.type = SemanticType.LARGEOP;
  454. center.role = SemanticRole.SUM;
  455. return nodes;
  456. }, (nodes) => {
  457. return (nodes[0].type === SemanticType.OPERATOR &&
  458. nodes
  459. .slice(1)
  460. .some((node) => node.type === SemanticType.RELSEQ ||
  461. node.type === SemanticType.MULTIREL ||
  462. (node.type === SemanticType.INFIXOP &&
  463. node.role === SemanticRole.ELEMENT) ||
  464. (node.type === SemanticType.PUNCTUATED &&
  465. node.role === SemanticRole.SEQUENCE)));
  466. }));
  467. SemanticHeuristics.add(new SemanticMultiHeuristic('bracketed_interval', (nodes) => {
  468. const leftFence = nodes[0];
  469. const rightFence = nodes[1];
  470. const content = nodes.slice(2);
  471. const childNode = SemanticProcessor.getInstance().row(content);
  472. const fenced = SemanticHeuristics.factory.makeBranchNode(SemanticType.FENCED, [childNode], [leftFence, rightFence]);
  473. fenced.role = SemanticRole.LEFTRIGHT;
  474. return fenced;
  475. }, (nodes) => {
  476. const leftFence = nodes[0];
  477. const rightFence = nodes[1];
  478. const content = nodes.slice(2);
  479. if (!(leftFence &&
  480. (leftFence.textContent === ']' || leftFence.textContent === '[') &&
  481. rightFence &&
  482. (rightFence.textContent === ']' || rightFence.textContent === '['))) {
  483. return false;
  484. }
  485. const partition = SemanticUtil.partitionNodes(content, SemanticPred.isPunctuation);
  486. return !!(partition.rel.length === 1 &&
  487. partition.comp[0].length &&
  488. partition.comp[1].length);
  489. }));
  490. SemanticHeuristics.add(new SemanticMmlHeuristic('function_from_identifiers', (node) => {
  491. const expr = DomUtil.toArray(node.childNodes)
  492. .map((x) => x.textContent.trim())
  493. .join('');
  494. const meaning = SemanticMap.Meaning.get(expr);
  495. if (meaning.type === SemanticType.UNKNOWN) {
  496. return node;
  497. }
  498. const snode = SemanticHeuristics.factory.makeLeafNode(expr, SemanticProcessor.getInstance().font(node.getAttribute('mathvariant')));
  499. snode.mathmlTree = node;
  500. return snode;
  501. }, (node) => {
  502. const children = DomUtil.toArray(node.childNodes);
  503. if (children.length < 2) {
  504. return false;
  505. }
  506. return children.every((child) => DomUtil.tagName(child) === MMLTAGS.MI &&
  507. SemanticMap.Meaning.get(child.textContent.trim()).role ===
  508. SemanticRole.LATINLETTER);
  509. }));