office-xml-reader.js 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. var _ = require("underscore");
  2. var promises = require("../promises");
  3. var xml = require("../xml");
  4. exports.read = read;
  5. exports.readXmlFromZipFile = readXmlFromZipFile;
  6. var xmlNamespaceMap = {
  7. // Transitional format
  8. "http://schemas.openxmlformats.org/wordprocessingml/2006/main": "w",
  9. "http://schemas.openxmlformats.org/officeDocument/2006/relationships": "r",
  10. "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing": "wp",
  11. "http://schemas.openxmlformats.org/drawingml/2006/main": "a",
  12. "http://schemas.openxmlformats.org/drawingml/2006/picture": "pic",
  13. // Strict format
  14. "http://purl.oclc.org/ooxml/wordprocessingml/main": "w",
  15. "http://purl.oclc.org/ooxml/officeDocument/relationships": "r",
  16. "http://purl.oclc.org/ooxml/drawingml/wordprocessingDrawing": "wp",
  17. "http://purl.oclc.org/ooxml/drawingml/main": "a",
  18. "http://purl.oclc.org/ooxml/drawingml/picture": "pic",
  19. // Common
  20. "http://schemas.openxmlformats.org/package/2006/content-types": "content-types",
  21. "http://schemas.openxmlformats.org/package/2006/relationships": "relationships",
  22. "http://schemas.openxmlformats.org/markup-compatibility/2006": "mc",
  23. "urn:schemas-microsoft-com:vml": "v",
  24. "urn:schemas-microsoft-com:office:word": "office-word"
  25. };
  26. function read(xmlString) {
  27. return xml.readString(xmlString, xmlNamespaceMap)
  28. .then(function(document) {
  29. return collapseAlternateContent(document)[0];
  30. });
  31. }
  32. function readXmlFromZipFile(docxFile, path) {
  33. if (docxFile.exists(path)) {
  34. return docxFile.read(path, "utf-8")
  35. .then(stripUtf8Bom)
  36. .then(read);
  37. } else {
  38. return promises.resolve(null);
  39. }
  40. }
  41. function stripUtf8Bom(xmlString) {
  42. return xmlString.replace(/^\uFEFF/g, '');
  43. }
  44. function collapseAlternateContent(node) {
  45. if (node.type === "element") {
  46. if (node.name === "mc:AlternateContent") {
  47. return node.first("mc:Fallback").children;
  48. } else {
  49. node.children = _.flatten(node.children.map(collapseAlternateContent, true));
  50. return [node];
  51. }
  52. } else {
  53. return [node];
  54. }
  55. }