index.js 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. var _ = require("underscore");
  2. var docxReader = require("./docx/docx-reader");
  3. var docxStyleMap = require("./docx/style-map");
  4. var DocumentConverter = require("./document-to-html").DocumentConverter;
  5. var convertElementToRawText = require("./raw-text").convertElementToRawText;
  6. var readStyle = require("./style-reader").readStyle;
  7. var readOptions = require("./options-reader").readOptions;
  8. var unzip = require("./unzip");
  9. var Result = require("./results").Result;
  10. exports.convertToHtml = convertToHtml;
  11. exports.convertToMarkdown = convertToMarkdown;
  12. exports.convert = convert;
  13. exports.extractRawText = extractRawText;
  14. exports.images = require("./images");
  15. exports.transforms = require("./transforms");
  16. exports.underline = require("./underline");
  17. exports.embedStyleMap = embedStyleMap;
  18. exports.readEmbeddedStyleMap = readEmbeddedStyleMap;
  19. function convertToHtml(input, options) {
  20. return convert(input, options);
  21. }
  22. function convertToMarkdown(input, options) {
  23. var markdownOptions = Object.create(options || {});
  24. markdownOptions.outputFormat = "markdown";
  25. return convert(input, markdownOptions);
  26. }
  27. function convert(input, options) {
  28. options = readOptions(options);
  29. return unzip.openZip(input)
  30. .tap(function(docxFile) {
  31. return docxStyleMap.readStyleMap(docxFile).then(function(styleMap) {
  32. options.embeddedStyleMap = styleMap;
  33. });
  34. })
  35. .then(function(docxFile) {
  36. return docxReader.read(docxFile, input)
  37. .then(function(documentResult) {
  38. return documentResult.map(options.transformDocument);
  39. })
  40. .then(function(documentResult) {
  41. return convertDocumentToHtml(documentResult, options);
  42. });
  43. });
  44. }
  45. function readEmbeddedStyleMap(input) {
  46. return unzip.openZip(input)
  47. .then(docxStyleMap.readStyleMap);
  48. }
  49. function convertDocumentToHtml(documentResult, options) {
  50. var styleMapResult = parseStyleMap(options.readStyleMap());
  51. var parsedOptions = _.extend({}, options, {
  52. styleMap: styleMapResult.value
  53. });
  54. var documentConverter = new DocumentConverter(parsedOptions);
  55. return documentResult.flatMapThen(function(document) {
  56. return styleMapResult.flatMapThen(function(styleMap) {
  57. return documentConverter.convertToHtml(document);
  58. });
  59. });
  60. }
  61. function parseStyleMap(styleMap) {
  62. return Result.combine((styleMap || []).map(readStyle))
  63. .map(function(styleMap) {
  64. return styleMap.filter(function(styleMapping) {
  65. return !!styleMapping;
  66. });
  67. });
  68. }
  69. function extractRawText(input) {
  70. return unzip.openZip(input)
  71. .then(docxReader.read)
  72. .then(function(documentResult) {
  73. return documentResult.map(convertElementToRawText);
  74. });
  75. }
  76. function embedStyleMap(input, styleMap) {
  77. return unzip.openZip(input)
  78. .tap(function(docxFile) {
  79. return docxStyleMap.writeStyleMap(docxFile, styleMap);
  80. })
  81. .then(function(docxFile) {
  82. return docxFile.toArrayBuffer();
  83. })
  84. .then(function(arrayBuffer) {
  85. return {
  86. toArrayBuffer: function() {
  87. return arrayBuffer;
  88. },
  89. toBuffer: function() {
  90. return Buffer.from(arrayBuffer);
  91. }
  92. };
  93. });
  94. }
  95. exports.styleMapping = function() {
  96. throw new Error('Use a raw string instead of mammoth.styleMapping e.g. "p[style-name=\'Title\'] => h1" instead of mammoth.styleMapping("p[style-name=\'Title\'] => h1")');
  97. };