document-to-html.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. var _ = require("underscore");
  2. var promises = require("./promises");
  3. var documents = require("./documents");
  4. var htmlPaths = require("./styles/html-paths");
  5. var results = require("./results");
  6. var images = require("./images");
  7. var Html = require("./html");
  8. var writers = require("./writers");
  9. exports.DocumentConverter = DocumentConverter;
  10. function DocumentConverter(options) {
  11. return {
  12. convertToHtml: function(element) {
  13. var comments = _.indexBy(
  14. element.type === documents.types.document ? element.comments : [],
  15. "commentId"
  16. );
  17. var conversion = new DocumentConversion(options, comments);
  18. return conversion.convertToHtml(element);
  19. }
  20. };
  21. }
  22. function DocumentConversion(options, comments) {
  23. var noteNumber = 1;
  24. var noteReferences = [];
  25. var referencedComments = [];
  26. options = _.extend({ignoreEmptyParagraphs: true}, options);
  27. var idPrefix = options.idPrefix === undefined ? "" : options.idPrefix;
  28. var ignoreEmptyParagraphs = options.ignoreEmptyParagraphs;
  29. var defaultParagraphStyle = htmlPaths.topLevelElement("p");
  30. var styleMap = options.styleMap || [];
  31. function convertToHtml(document) {
  32. var messages = [];
  33. var html = elementToHtml(document, messages, {});
  34. var deferredNodes = [];
  35. walkHtml(html, function(node) {
  36. if (node.type === "deferred") {
  37. deferredNodes.push(node);
  38. }
  39. });
  40. var deferredValues = {};
  41. return promises.mapSeries(deferredNodes, function(deferred) {
  42. return deferred.value().then(function(value) {
  43. deferredValues[deferred.id] = value;
  44. });
  45. }).then(function() {
  46. function replaceDeferred(nodes) {
  47. return flatMap(nodes, function(node) {
  48. if (node.type === "deferred") {
  49. return deferredValues[node.id];
  50. } else if (node.children) {
  51. return [
  52. _.extend({}, node, {
  53. children: replaceDeferred(node.children)
  54. })
  55. ];
  56. } else {
  57. return [node];
  58. }
  59. });
  60. }
  61. var writer = writers.writer({
  62. prettyPrint: options.prettyPrint,
  63. outputFormat: options.outputFormat
  64. });
  65. Html.write(writer, Html.simplify(replaceDeferred(html)));
  66. return new results.Result(writer.asString(), messages);
  67. });
  68. }
  69. function convertElements(elements, messages, options) {
  70. return flatMap(elements, function(element) {
  71. return elementToHtml(element, messages, options);
  72. });
  73. }
  74. function elementToHtml(element, messages, options) {
  75. if (!options) {
  76. throw new Error("options not set");
  77. }
  78. var handler = elementConverters[element.type];
  79. if (handler) {
  80. return handler(element, messages, options);
  81. } else {
  82. return [];
  83. }
  84. }
  85. function convertParagraph(element, messages, options) {
  86. return htmlPathForParagraph(element, messages).wrap(function() {
  87. var content = convertElements(element.children, messages, options);
  88. if (ignoreEmptyParagraphs) {
  89. return content;
  90. } else {
  91. return [Html.forceWrite].concat(content);
  92. }
  93. });
  94. }
  95. function htmlPathForParagraph(element, messages) {
  96. var style = findStyle(element);
  97. if (style) {
  98. return style.to;
  99. } else {
  100. if (element.styleId) {
  101. messages.push(unrecognisedStyleWarning("paragraph", element));
  102. }
  103. return defaultParagraphStyle;
  104. }
  105. }
  106. function convertRun(run, messages, options) {
  107. var nodes = function() {
  108. return convertElements(run.children, messages, options);
  109. };
  110. var paths = [];
  111. if (run.highlight !== null) {
  112. var path = findHtmlPath({type: "highlight", color: run.highlight});
  113. if (path) {
  114. paths.push(path);
  115. }
  116. }
  117. if (run.isSmallCaps) {
  118. paths.push(findHtmlPathForRunProperty("smallCaps"));
  119. }
  120. if (run.isAllCaps) {
  121. paths.push(findHtmlPathForRunProperty("allCaps"));
  122. }
  123. if (run.isStrikethrough) {
  124. paths.push(findHtmlPathForRunProperty("strikethrough", "s"));
  125. }
  126. if (run.isUnderline) {
  127. paths.push(findHtmlPathForRunProperty("underline"));
  128. }
  129. if (run.verticalAlignment === documents.verticalAlignment.subscript) {
  130. paths.push(htmlPaths.element("sub", {}, {fresh: false}));
  131. }
  132. if (run.verticalAlignment === documents.verticalAlignment.superscript) {
  133. paths.push(htmlPaths.element("sup", {}, {fresh: false}));
  134. }
  135. if (run.isItalic) {
  136. paths.push(findHtmlPathForRunProperty("italic", "em"));
  137. }
  138. if (run.isBold) {
  139. paths.push(findHtmlPathForRunProperty("bold", "strong"));
  140. }
  141. var stylePath = htmlPaths.empty;
  142. var style = findStyle(run);
  143. if (style) {
  144. stylePath = style.to;
  145. } else if (run.styleId) {
  146. messages.push(unrecognisedStyleWarning("run", run));
  147. }
  148. paths.push(stylePath);
  149. paths.forEach(function(path) {
  150. nodes = path.wrap.bind(path, nodes);
  151. });
  152. return nodes();
  153. }
  154. function findHtmlPathForRunProperty(elementType, defaultTagName) {
  155. var path = findHtmlPath({type: elementType});
  156. if (path) {
  157. return path;
  158. } else if (defaultTagName) {
  159. return htmlPaths.element(defaultTagName, {}, {fresh: false});
  160. } else {
  161. return htmlPaths.empty;
  162. }
  163. }
  164. function findHtmlPath(element, defaultPath) {
  165. var style = findStyle(element);
  166. return style ? style.to : defaultPath;
  167. }
  168. function findStyle(element) {
  169. for (var i = 0; i < styleMap.length; i++) {
  170. if (styleMap[i].from.matches(element)) {
  171. return styleMap[i];
  172. }
  173. }
  174. }
  175. function recoveringConvertImage(convertImage) {
  176. return function(image, messages) {
  177. return promises.attempt(function() {
  178. return convertImage(image, messages);
  179. }).caught(function(error) {
  180. messages.push(results.error(error));
  181. return [];
  182. });
  183. };
  184. }
  185. function noteHtmlId(note) {
  186. return referentHtmlId(note.noteType, note.noteId);
  187. }
  188. function noteRefHtmlId(note) {
  189. return referenceHtmlId(note.noteType, note.noteId);
  190. }
  191. function referentHtmlId(referenceType, referenceId) {
  192. return htmlId(referenceType + "-" + referenceId);
  193. }
  194. function referenceHtmlId(referenceType, referenceId) {
  195. return htmlId(referenceType + "-ref-" + referenceId);
  196. }
  197. function htmlId(suffix) {
  198. return idPrefix + suffix;
  199. }
  200. var defaultTablePath = htmlPaths.elements([
  201. htmlPaths.element("table", {}, {fresh: true})
  202. ]);
  203. function convertTable(element, messages, options) {
  204. return findHtmlPath(element, defaultTablePath).wrap(function() {
  205. return convertTableChildren(element, messages, options);
  206. });
  207. }
  208. function convertTableChildren(element, messages, options) {
  209. var bodyIndex = _.findIndex(element.children, function(child) {
  210. return !child.type === documents.types.tableRow || !child.isHeader;
  211. });
  212. if (bodyIndex === -1) {
  213. bodyIndex = element.children.length;
  214. }
  215. var children;
  216. if (bodyIndex === 0) {
  217. children = convertElements(
  218. element.children,
  219. messages,
  220. _.extend({}, options, {isTableHeader: false})
  221. );
  222. } else {
  223. var headRows = convertElements(
  224. element.children.slice(0, bodyIndex),
  225. messages,
  226. _.extend({}, options, {isTableHeader: true})
  227. );
  228. var bodyRows = convertElements(
  229. element.children.slice(bodyIndex),
  230. messages,
  231. _.extend({}, options, {isTableHeader: false})
  232. );
  233. children = [
  234. Html.freshElement("thead", {}, headRows),
  235. Html.freshElement("tbody", {}, bodyRows)
  236. ];
  237. }
  238. return [Html.forceWrite].concat(children);
  239. }
  240. function convertTableRow(element, messages, options) {
  241. var children = convertElements(element.children, messages, options);
  242. return [
  243. Html.freshElement("tr", {}, [Html.forceWrite].concat(children))
  244. ];
  245. }
  246. function convertTableCell(element, messages, options) {
  247. var tagName = options.isTableHeader ? "th" : "td";
  248. var children = convertElements(element.children, messages, options);
  249. var attributes = {};
  250. if (element.colSpan !== 1) {
  251. attributes.colspan = element.colSpan.toString();
  252. }
  253. if (element.rowSpan !== 1) {
  254. attributes.rowspan = element.rowSpan.toString();
  255. }
  256. return [
  257. Html.freshElement(tagName, attributes, [Html.forceWrite].concat(children))
  258. ];
  259. }
  260. function convertCommentReference(reference, messages, options) {
  261. return findHtmlPath(reference, htmlPaths.ignore).wrap(function() {
  262. var comment = comments[reference.commentId];
  263. var count = referencedComments.length + 1;
  264. var label = "[" + commentAuthorLabel(comment) + count + "]";
  265. referencedComments.push({label: label, comment: comment});
  266. // TODO: remove duplication with note references
  267. return [
  268. Html.freshElement("a", {
  269. href: "#" + referentHtmlId("comment", reference.commentId),
  270. id: referenceHtmlId("comment", reference.commentId)
  271. }, [Html.text(label)])
  272. ];
  273. });
  274. }
  275. function convertComment(referencedComment, messages, options) {
  276. // TODO: remove duplication with note references
  277. var label = referencedComment.label;
  278. var comment = referencedComment.comment;
  279. var body = convertElements(comment.body, messages, options).concat([
  280. Html.nonFreshElement("p", {}, [
  281. Html.text(" "),
  282. Html.freshElement("a", {"href": "#" + referenceHtmlId("comment", comment.commentId)}, [
  283. Html.text("↑")
  284. ])
  285. ])
  286. ]);
  287. return [
  288. Html.freshElement(
  289. "dt",
  290. {"id": referentHtmlId("comment", comment.commentId)},
  291. [Html.text("Comment " + label)]
  292. ),
  293. Html.freshElement("dd", {}, body)
  294. ];
  295. }
  296. function convertBreak(element, messages, options) {
  297. return htmlPathForBreak(element).wrap(function() {
  298. return [];
  299. });
  300. }
  301. function htmlPathForBreak(element) {
  302. var style = findStyle(element);
  303. if (style) {
  304. return style.to;
  305. } else if (element.breakType === "line") {
  306. return htmlPaths.topLevelElement("br");
  307. } else {
  308. return htmlPaths.empty;
  309. }
  310. }
  311. var elementConverters = {
  312. "document": function(document, messages, options) {
  313. var children = convertElements(document.children, messages, options);
  314. var notes = noteReferences.map(function(noteReference) {
  315. return document.notes.resolve(noteReference);
  316. });
  317. var notesNodes = convertElements(notes, messages, options);
  318. return children.concat([
  319. Html.freshElement("ol", {}, notesNodes),
  320. Html.freshElement("dl", {}, flatMap(referencedComments, function(referencedComment) {
  321. return convertComment(referencedComment, messages, options);
  322. }))
  323. ]);
  324. },
  325. "paragraph": convertParagraph,
  326. "run": convertRun,
  327. "text": function(element, messages, options) {
  328. return [Html.text(element.value)];
  329. },
  330. "tab": function(element, messages, options) {
  331. return [Html.text("\t")];
  332. },
  333. "hyperlink": function(element, messages, options) {
  334. var href = element.anchor ? "#" + htmlId(element.anchor) : element.href;
  335. var attributes = {href: href};
  336. if (element.targetFrame != null) {
  337. attributes.target = element.targetFrame;
  338. }
  339. var children = convertElements(element.children, messages, options);
  340. return [Html.nonFreshElement("a", attributes, children)];
  341. },
  342. "bookmarkStart": function(element, messages, options) {
  343. var anchor = Html.freshElement("a", {
  344. id: htmlId(element.name)
  345. }, [Html.forceWrite]);
  346. return [anchor];
  347. },
  348. "noteReference": function(element, messages, options) {
  349. noteReferences.push(element);
  350. var anchor = Html.freshElement("a", {
  351. href: "#" + noteHtmlId(element),
  352. id: noteRefHtmlId(element)
  353. }, [Html.text("[" + (noteNumber++) + "]")]);
  354. return [Html.freshElement("sup", {}, [anchor])];
  355. },
  356. "note": function(element, messages, options) {
  357. var children = convertElements(element.body, messages, options);
  358. var backLink = Html.elementWithTag(htmlPaths.element("p", {}, {fresh: false}), [
  359. Html.text(" "),
  360. Html.freshElement("a", {href: "#" + noteRefHtmlId(element)}, [Html.text("↑")])
  361. ]);
  362. var body = children.concat([backLink]);
  363. return Html.freshElement("li", {id: noteHtmlId(element)}, body);
  364. },
  365. "commentReference": convertCommentReference,
  366. "comment": convertComment,
  367. "image": deferredConversion(recoveringConvertImage(options.convertImage || images.dataUri)),
  368. "table": convertTable,
  369. "tableRow": convertTableRow,
  370. "tableCell": convertTableCell,
  371. "break": convertBreak
  372. };
  373. return {
  374. convertToHtml: convertToHtml
  375. };
  376. }
  377. var deferredId = 1;
  378. function deferredConversion(func) {
  379. return function(element, messages, options) {
  380. return [
  381. {
  382. type: "deferred",
  383. id: deferredId++,
  384. value: function() {
  385. return func(element, messages, options);
  386. }
  387. }
  388. ];
  389. };
  390. }
  391. function unrecognisedStyleWarning(type, element) {
  392. return results.warning(
  393. "Unrecognised " + type + " style: '" + element.styleName + "'" +
  394. " (Style ID: " + element.styleId + ")"
  395. );
  396. }
  397. function flatMap(values, func) {
  398. return _.flatten(values.map(func), true);
  399. }
  400. function walkHtml(nodes, callback) {
  401. nodes.forEach(function(node) {
  402. callback(node);
  403. if (node.children) {
  404. walkHtml(node.children, callback);
  405. }
  406. });
  407. }
  408. var commentAuthorLabel = exports.commentAuthorLabel = function commentAuthorLabel(comment) {
  409. return comment.authorInitials || "";
  410. };