docx-reader.js 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. exports.read = read;
  2. exports._findPartPaths = findPartPaths;
  3. var promises = require("../promises");
  4. var documents = require("../documents");
  5. var Result = require("../results").Result;
  6. var zipfile = require("../zipfile");
  7. var readXmlFromZipFile = require("./office-xml-reader").readXmlFromZipFile;
  8. var createBodyReader = require("./body-reader").createBodyReader;
  9. var DocumentXmlReader = require("./document-xml-reader").DocumentXmlReader;
  10. var relationshipsReader = require("./relationships-reader");
  11. var contentTypesReader = require("./content-types-reader");
  12. var numberingXml = require("./numbering-xml");
  13. var stylesReader = require("./styles-reader");
  14. var notesReader = require("./notes-reader");
  15. var commentsReader = require("./comments-reader");
  16. var Files = require("./files").Files;
  17. function read(docxFile, input) {
  18. input = input || {};
  19. return promises.props({
  20. contentTypes: readContentTypesFromZipFile(docxFile),
  21. partPaths: findPartPaths(docxFile),
  22. docxFile: docxFile,
  23. files: input.path ? Files.relativeToFile(input.path) : new Files(null)
  24. }).also(function(result) {
  25. return {
  26. styles: readStylesFromZipFile(docxFile, result.partPaths.styles)
  27. };
  28. }).also(function(result) {
  29. return {
  30. numbering: readNumberingFromZipFile(docxFile, result.partPaths.numbering, result.styles)
  31. };
  32. }).also(function(result) {
  33. return {
  34. footnotes: readXmlFileWithBody(result.partPaths.footnotes, result, function(bodyReader, xml) {
  35. if (xml) {
  36. return notesReader.createFootnotesReader(bodyReader)(xml);
  37. } else {
  38. return new Result([]);
  39. }
  40. }),
  41. endnotes: readXmlFileWithBody(result.partPaths.endnotes, result, function(bodyReader, xml) {
  42. if (xml) {
  43. return notesReader.createEndnotesReader(bodyReader)(xml);
  44. } else {
  45. return new Result([]);
  46. }
  47. }),
  48. comments: readXmlFileWithBody(result.partPaths.comments, result, function(bodyReader, xml) {
  49. if (xml) {
  50. return commentsReader.createCommentsReader(bodyReader)(xml);
  51. } else {
  52. return new Result([]);
  53. }
  54. })
  55. };
  56. }).also(function(result) {
  57. return {
  58. notes: result.footnotes.flatMap(function(footnotes) {
  59. return result.endnotes.map(function(endnotes) {
  60. return new documents.Notes(footnotes.concat(endnotes));
  61. });
  62. })
  63. };
  64. }).then(function(result) {
  65. return readXmlFileWithBody(result.partPaths.mainDocument, result, function(bodyReader, xml) {
  66. return result.notes.flatMap(function(notes) {
  67. return result.comments.flatMap(function(comments) {
  68. var reader = new DocumentXmlReader({
  69. bodyReader: bodyReader,
  70. notes: notes,
  71. comments: comments
  72. });
  73. return reader.convertXmlToDocument(xml);
  74. });
  75. });
  76. });
  77. });
  78. }
  79. function findPartPaths(docxFile) {
  80. return readPackageRelationships(docxFile).then(function(packageRelationships) {
  81. var mainDocumentPath = findPartPath({
  82. docxFile: docxFile,
  83. relationships: packageRelationships,
  84. relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
  85. basePath: "",
  86. fallbackPath: "word/document.xml"
  87. });
  88. if (!docxFile.exists(mainDocumentPath)) {
  89. throw new Error("Could not find main document part. Are you sure this is a valid .docx file?");
  90. }
  91. return xmlFileReader({
  92. filename: relationshipsFilename(mainDocumentPath),
  93. readElement: relationshipsReader.readRelationships,
  94. defaultValue: relationshipsReader.defaultValue
  95. })(docxFile).then(function(documentRelationships) {
  96. function findPartRelatedToMainDocument(name) {
  97. return findPartPath({
  98. docxFile: docxFile,
  99. relationships: documentRelationships,
  100. relationshipType: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/" + name,
  101. basePath: zipfile.splitPath(mainDocumentPath).dirname,
  102. fallbackPath: "word/" + name + ".xml"
  103. });
  104. }
  105. return {
  106. mainDocument: mainDocumentPath,
  107. comments: findPartRelatedToMainDocument("comments"),
  108. endnotes: findPartRelatedToMainDocument("endnotes"),
  109. footnotes: findPartRelatedToMainDocument("footnotes"),
  110. numbering: findPartRelatedToMainDocument("numbering"),
  111. styles: findPartRelatedToMainDocument("styles")
  112. };
  113. });
  114. });
  115. }
  116. function findPartPath(options) {
  117. var docxFile = options.docxFile;
  118. var relationships = options.relationships;
  119. var relationshipType = options.relationshipType;
  120. var basePath = options.basePath;
  121. var fallbackPath = options.fallbackPath;
  122. var targets = relationships.findTargetsByType(relationshipType);
  123. var normalisedTargets = targets.map(function(target) {
  124. return stripPrefix(zipfile.joinPath(basePath, target), "/");
  125. });
  126. var validTargets = normalisedTargets.filter(function(target) {
  127. return docxFile.exists(target);
  128. });
  129. if (validTargets.length === 0) {
  130. return fallbackPath;
  131. } else {
  132. return validTargets[0];
  133. }
  134. }
  135. function stripPrefix(value, prefix) {
  136. if (value.substring(0, prefix.length) === prefix) {
  137. return value.substring(prefix.length);
  138. } else {
  139. return value;
  140. }
  141. }
  142. function xmlFileReader(options) {
  143. return function(zipFile) {
  144. return readXmlFromZipFile(zipFile, options.filename)
  145. .then(function(element) {
  146. return element ? options.readElement(element) : options.defaultValue;
  147. });
  148. };
  149. }
  150. function readXmlFileWithBody(filename, options, func) {
  151. var readRelationshipsFromZipFile = xmlFileReader({
  152. filename: relationshipsFilename(filename),
  153. readElement: relationshipsReader.readRelationships,
  154. defaultValue: relationshipsReader.defaultValue
  155. });
  156. return readRelationshipsFromZipFile(options.docxFile).then(function(relationships) {
  157. var bodyReader = new createBodyReader({
  158. relationships: relationships,
  159. contentTypes: options.contentTypes,
  160. docxFile: options.docxFile,
  161. numbering: options.numbering,
  162. styles: options.styles,
  163. files: options.files
  164. });
  165. return readXmlFromZipFile(options.docxFile, filename)
  166. .then(function(xml) {
  167. return func(bodyReader, xml);
  168. });
  169. });
  170. }
  171. function relationshipsFilename(filename) {
  172. var split = zipfile.splitPath(filename);
  173. return zipfile.joinPath(split.dirname, "_rels", split.basename + ".rels");
  174. }
  175. var readContentTypesFromZipFile = xmlFileReader({
  176. filename: "[Content_Types].xml",
  177. readElement: contentTypesReader.readContentTypesFromXml,
  178. defaultValue: contentTypesReader.defaultContentTypes
  179. });
  180. function readNumberingFromZipFile(zipFile, path, styles) {
  181. return xmlFileReader({
  182. filename: path,
  183. readElement: function(element) {
  184. return numberingXml.readNumberingXml(element, {styles: styles});
  185. },
  186. defaultValue: numberingXml.defaultNumbering
  187. })(zipFile);
  188. }
  189. function readStylesFromZipFile(zipFile, path) {
  190. return xmlFileReader({
  191. filename: path,
  192. readElement: stylesReader.readStylesXml,
  193. defaultValue: stylesReader.defaultStyles
  194. })(zipFile);
  195. }
  196. var readPackageRelationships = xmlFileReader({
  197. filename: "_rels/.rels",
  198. readElement: relationshipsReader.readRelationships,
  199. defaultValue: relationshipsReader.defaultValue
  200. });