123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586 |
- 'use strict';
- var conventions = require('./conventions');
- var dom = require('./dom');
- var errors = require('./errors');
- var entities = require('./entities');
- var sax = require('./sax');
- var DOMImplementation = dom.DOMImplementation;
- var hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
- var isHTMLMimeType = conventions.isHTMLMimeType;
- var isValidMimeType = conventions.isValidMimeType;
- var MIME_TYPE = conventions.MIME_TYPE;
- var NAMESPACE = conventions.NAMESPACE;
- var ParseError = errors.ParseError;
- var XMLReader = sax.XMLReader;
- /**
- * Normalizes line ending according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
- * including some Unicode "newline" characters:
- *
- * > XML parsed entities are often stored in computer files which,
- * > for editing convenience, are organized into lines.
- * > These lines are typically separated by some combination
- * > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
- * >
- * > To simplify the tasks of applications, the XML processor must behave
- * > as if it normalized all line breaks in external parsed entities (including the document entity)
- * > on input, before parsing, by translating the following to a single #xA character:
- * >
- * > 1. the two-character sequence #xD #xA,
- * > 2. the two-character sequence #xD #x85,
- * > 3. the single character #x85,
- * > 4. the single character #x2028,
- * > 5. the single character #x2029,
- * > 6. any #xD character that is not immediately followed by #xA or #x85.
- *
- * @param {string} input
- * @returns {string}
- * @prettierignore
- */
- function normalizeLineEndings(input) {
- return input.replace(/\r[\n\u0085]/g, '\n').replace(/[\r\u0085\u2028\u2029]/g, '\n');
- }
- /**
- * @typedef Locator
- * @property {number} [columnNumber]
- * @property {number} [lineNumber]
- */
- /**
- * @typedef DOMParserOptions
- * @property {typeof assign} [assign]
- * The method to use instead of `conventions.assign`, which is used to copy values from
- * `options` before they are used for parsing.
- * @property {typeof DOMHandler} [domHandler]
- * For internal testing: The class for creating an instance for handling events from the SAX
- * parser.
- * *****Warning: By configuring a faulty implementation, the specified behavior can completely
- * be broken.*****.
- * @property {Function} [errorHandler]
- * DEPRECATED! use `onError` instead.
- * @property {function(level:ErrorLevel, message:string, context: DOMHandler):void}
- * [onError]
- * A function invoked for every error that occurs during parsing.
- *
- * If it is not provided, all errors are reported to `console.error`
- * and only `fatalError`s are thrown as a `ParseError`,
- * which prevents any further processing.
- * If the provided method throws, a `ParserError` is thrown,
- * which prevents any further processing.
- *
- * Be aware that many `warning`s are considered an error that prevents further processing in
- * most implementations.
- * @property {boolean} [locator=true]
- * Configures if the nodes created during parsing will have a `lineNumber` and a `columnNumber`
- * attribute describing their location in the XML string.
- * Default is true.
- * @property {(string) => string} [normalizeLineEndings]
- * used to replace line endings before parsing, defaults to exported `normalizeLineEndings`,
- * which normalizes line endings according to <https://www.w3.org/TR/xml11/#sec-line-ends>,
- * including some Unicode "newline" characters.
- * @property {Object} [xmlns]
- * The XML namespaces that should be assumed when parsing.
- * The default namespace can be provided by the key that is the empty string.
- * When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
- * the default namespace that will be used,
- * will be overridden according to the specification.
- * @see {@link normalizeLineEndings}
- */
- /**
- * The DOMParser interface provides the ability to parse XML or HTML source code from a string
- * into a DOM `Document`.
- *
- * ***xmldom is different from the spec in that it allows an `options` parameter,
- * to control the behavior***.
- *
- * @class
- * @param {DOMParserOptions} [options]
- * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
- * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
- */
- function DOMParser(options) {
- options = options || {};
- if (options.locator === undefined) {
- options.locator = true;
- }
- /**
- * The method to use instead of `conventions.assign`, which is used to copy values from
- * `options`
- * before they are used for parsing.
- *
- * @type {conventions.assign}
- * @private
- * @see {@link conventions.assign}
- * @readonly
- */
- this.assign = options.assign || conventions.assign;
- /**
- * For internal testing: The class for creating an instance for handling events from the SAX
- * parser.
- * *****Warning: By configuring a faulty implementation, the specified behavior can completely
- * be broken*****.
- *
- * @type {typeof DOMHandler}
- * @private
- * @readonly
- */
- this.domHandler = options.domHandler || DOMHandler;
- /**
- * A function that is invoked for every error that occurs during parsing.
- *
- * If it is not provided, all errors are reported to `console.error`
- * and only `fatalError`s are thrown as a `ParseError`,
- * which prevents any further processing.
- * If the provided method throws, a `ParserError` is thrown,
- * which prevents any further processing.
- *
- * Be aware that many `warning`s are considered an error that prevents further processing in
- * most implementations.
- *
- * @type {function(level:ErrorLevel, message:string, context: DOMHandler):void}
- * @see {@link onErrorStopParsing}
- * @see {@link onWarningStopParsing}
- */
- this.onError = options.onError || options.errorHandler;
- if (options.errorHandler && typeof options.errorHandler !== 'function') {
- throw new TypeError('errorHandler object is no longer supported, switch to onError!');
- } else if (options.errorHandler) {
- options.errorHandler('warning', 'The `errorHandler` option has been deprecated, use `onError` instead!', this);
- }
- /**
- * used to replace line endings before parsing, defaults to `normalizeLineEndings`
- *
- * @type {(string) => string}
- * @readonly
- */
- this.normalizeLineEndings = options.normalizeLineEndings || normalizeLineEndings;
- /**
- * Configures if the nodes created during parsing will have a `lineNumber` and a
- * `columnNumber`
- * attribute describing their location in the XML string.
- * Default is true.
- *
- * @type {boolean}
- * @readonly
- */
- this.locator = !!options.locator;
- /**
- * The default namespace can be provided by the key that is the empty string.
- * When the `mimeType` for HTML, XHTML or SVG are passed to `parseFromString`,
- * the default namespace that will be used,
- * will be overridden according to the specification.
- *
- * @type {Readonly<Object>}
- * @readonly
- */
- this.xmlns = this.assign(Object.create(null), options.xmlns);
- }
- /**
- * Parses `source` using the options in the way configured by the `DOMParserOptions` of `this`
- * `DOMParser`. If `mimeType` is `text/html` an HTML `Document` is created,
- * otherwise an XML `Document` is created.
- *
- * __It behaves different from the description in the living standard__:
- * - Uses the `options` passed to the `DOMParser` constructor to modify the behavior.
- * - Any unexpected input is reported to `onError` with either a `warning`,
- * `error` or `fatalError` level.
- * - Any `fatalError` throws a `ParseError` which prevents further processing.
- * - Any error thrown by `onError` is converted to a `ParseError` which prevents further
- * processing - If no `Document` was created during parsing it is reported as a `fatalError`.
- * *****Warning: By configuring a faulty DOMHandler implementation,
- * the specified behavior can completely be broken*****.
- *
- * @param {string} source
- * The XML mime type only allows string input!
- * @param {string} [mimeType='application/xml']
- * the mimeType or contentType of the document to be created determines the `type` of document
- * created (XML or HTML)
- * @returns {Document}
- * The `Document` node.
- * @throws {ParseError}
- * for any `fatalError` or anything that is thrown by `onError`
- * @throws {TypeError}
- * for any invalid `mimeType`
- * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser/parseFromString
- * @see https://html.spec.whatwg.org/#dom-domparser-parsefromstring-dev
- */
- DOMParser.prototype.parseFromString = function (source, mimeType) {
- if (!isValidMimeType(mimeType)) {
- throw new TypeError('DOMParser.parseFromString: the provided mimeType "' + mimeType + '" is not valid.');
- }
- var defaultNSMap = this.assign(Object.create(null), this.xmlns);
- var entityMap = entities.XML_ENTITIES;
- var defaultNamespace = defaultNSMap[''] || null;
- if (hasDefaultHTMLNamespace(mimeType)) {
- entityMap = entities.HTML_ENTITIES;
- defaultNamespace = NAMESPACE.HTML;
- } else if (mimeType === MIME_TYPE.XML_SVG_IMAGE) {
- defaultNamespace = NAMESPACE.SVG;
- }
- defaultNSMap[''] = defaultNamespace;
- defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
- var domBuilder = new this.domHandler({
- mimeType: mimeType,
- defaultNamespace: defaultNamespace,
- onError: this.onError,
- });
- var locator = this.locator ? {} : undefined;
- if (this.locator) {
- domBuilder.setDocumentLocator(locator);
- }
- var sax = new XMLReader();
- sax.errorHandler = domBuilder;
- sax.domBuilder = domBuilder;
- var isXml = !conventions.isHTMLMimeType(mimeType);
- if (isXml && typeof source !== 'string') {
- sax.errorHandler.fatalError('source is not a string');
- }
- sax.parse(this.normalizeLineEndings(String(source)), defaultNSMap, entityMap);
- if (!domBuilder.doc.documentElement) {
- sax.errorHandler.fatalError('missing root element');
- }
- return domBuilder.doc;
- };
- /**
- * @typedef DOMHandlerOptions
- * @property {string} [mimeType=MIME_TYPE.XML_APPLICATION]
- * @property {string | null} [defaultNamespace=null]
- */
- /**
- * The class that is used to handle events from the SAX parser to create the related DOM
- * elements.
- *
- * Some methods are only implemented as an empty function,
- * since they are (at least currently) not relevant for xmldom.
- *
- * @class
- * @param {DOMHandlerOptions} [options]
- * @see http://www.saxproject.org/apidoc/org/xml/sax/ext/DefaultHandler2.html
- */
- function DOMHandler(options) {
- var opt = options || {};
- /**
- * The mime type is used to determine if the DOM handler will create an XML or HTML document.
- * Only if it is set to `text/html` it will create an HTML document.
- * It defaults to MIME_TYPE.XML_APPLICATION.
- *
- * @type {string}
- * @see {@link MIME_TYPE}
- * @readonly
- */
- this.mimeType = opt.mimeType || MIME_TYPE.XML_APPLICATION;
- /**
- * The namespace to use to create an XML document.
- * For the following reasons this is required:
- * - The SAX API for `startDocument` doesn't offer any way to pass a namespace,
- * since at that point there is no way for the parser to know what the default namespace from
- * the document will be.
- * - When creating using `DOMImplementation.createDocument` it is required to pass a
- * namespace,
- * to determine the correct `Document.contentType`, which should match `this.mimeType`.
- * - When parsing an XML document with the `application/xhtml+xml` mimeType,
- * the HTML namespace needs to be the default namespace.
- *
- * @type {string | null}
- * @private
- * @readonly
- */
- this.defaultNamespace = opt.defaultNamespace || null;
- /**
- * @type {boolean}
- * @private
- */
- this.cdata = false;
- /**
- * The last `Element` that was created by `startElement`.
- * `endElement` sets it to the `currentElement.parentNode`.
- *
- * Note: The sax parser currently sets it to white space text nodes between tags.
- *
- * @type {Element | Node | undefined}
- * @private
- */
- this.currentElement = undefined;
- /**
- * The Document that is created as part of `startDocument`,
- * and returned by `DOMParser.parseFromString`.
- *
- * @type {Document | undefined}
- * @readonly
- */
- this.doc = undefined;
- /**
- * The locator is stored as part of setDocumentLocator.
- * It is controlled and mutated by the SAX parser to store the current parsing position.
- * It is used by DOMHandler to set `columnNumber` and `lineNumber`
- * on the DOM nodes.
- *
- * @type {Readonly<Locator> | undefined}
- * @private
- * @readonly (the
- * sax parser currently sometimes set's it)
- */
- this.locator = undefined;
- /**
- * @type {function (level:ErrorLevel ,message:string, context:DOMHandler):void}
- * @readonly
- */
- this.onError = opt.onError;
- }
- function position(locator, node) {
- node.lineNumber = locator.lineNumber;
- node.columnNumber = locator.columnNumber;
- }
- DOMHandler.prototype = {
- /**
- * Either creates an XML or an HTML document and stores it under `this.doc`.
- * If it is an XML document, `this.defaultNamespace` is used to create it,
- * and it will not contain any `childNodes`.
- * If it is an HTML document, it will be created without any `childNodes`.
- *
- * @see http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html
- */
- startDocument: function () {
- var impl = new DOMImplementation();
- this.doc = isHTMLMimeType(this.mimeType) ? impl.createHTMLDocument(false) : impl.createDocument(this.defaultNamespace, '');
- },
- startElement: function (namespaceURI, localName, qName, attrs) {
- var doc = this.doc;
- var el = doc.createElementNS(namespaceURI, qName || localName);
- var len = attrs.length;
- appendElement(this, el);
- this.currentElement = el;
- this.locator && position(this.locator, el);
- for (var i = 0; i < len; i++) {
- var namespaceURI = attrs.getURI(i);
- var value = attrs.getValue(i);
- var qName = attrs.getQName(i);
- var attr = doc.createAttributeNS(namespaceURI, qName);
- this.locator && position(attrs.getLocator(i), attr);
- attr.value = attr.nodeValue = value;
- el.setAttributeNode(attr);
- }
- },
- endElement: function (namespaceURI, localName, qName) {
- this.currentElement = this.currentElement.parentNode;
- },
- startPrefixMapping: function (prefix, uri) {},
- endPrefixMapping: function (prefix) {},
- processingInstruction: function (target, data) {
- var ins = this.doc.createProcessingInstruction(target, data);
- this.locator && position(this.locator, ins);
- appendElement(this, ins);
- },
- ignorableWhitespace: function (ch, start, length) {},
- characters: function (chars, start, length) {
- chars = _toString.apply(this, arguments);
- //console.log(chars)
- if (chars) {
- if (this.cdata) {
- var charNode = this.doc.createCDATASection(chars);
- } else {
- var charNode = this.doc.createTextNode(chars);
- }
- if (this.currentElement) {
- this.currentElement.appendChild(charNode);
- } else if (/^\s*$/.test(chars)) {
- this.doc.appendChild(charNode);
- //process xml
- }
- this.locator && position(this.locator, charNode);
- }
- },
- skippedEntity: function (name) {},
- endDocument: function () {
- this.doc.normalize();
- },
- /**
- * Stores the locator to be able to set the `columnNumber` and `lineNumber`
- * on the created DOM nodes.
- *
- * @param {Locator} locator
- */
- setDocumentLocator: function (locator) {
- if (locator) {
- locator.lineNumber = 0;
- }
- this.locator = locator;
- },
- //LexicalHandler
- comment: function (chars, start, length) {
- chars = _toString.apply(this, arguments);
- var comm = this.doc.createComment(chars);
- this.locator && position(this.locator, comm);
- appendElement(this, comm);
- },
- startCDATA: function () {
- //used in characters() methods
- this.cdata = true;
- },
- endCDATA: function () {
- this.cdata = false;
- },
- startDTD: function (name, publicId, systemId, internalSubset) {
- var impl = this.doc.implementation;
- if (impl && impl.createDocumentType) {
- var dt = impl.createDocumentType(name, publicId, systemId, internalSubset);
- this.locator && position(this.locator, dt);
- appendElement(this, dt);
- this.doc.doctype = dt;
- }
- },
- reportError: function (level, message) {
- if (typeof this.onError === 'function') {
- try {
- this.onError(level, message, this);
- } catch (e) {
- throw new ParseError('Reporting ' + level + ' "' + message + '" caused ' + e, this.locator);
- }
- } else {
- console.error('[xmldom ' + level + ']\t' + message, _locator(this.locator));
- }
- },
- /**
- * @see http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html
- */
- warning: function (message) {
- this.reportError('warning', message);
- },
- error: function (message) {
- this.reportError('error', message);
- },
- /**
- * This function reports a fatal error and throws a ParseError.
- *
- * @param {string} message
- * - The message to be used for reporting and throwing the error.
- * @returns {never}
- * This function always throws an error and never returns a value.
- * @throws {ParseError}
- * Always throws a ParseError with the provided message.
- */
- fatalError: function (message) {
- this.reportError('fatalError', message);
- throw new ParseError(message, this.locator);
- },
- };
- function _locator(l) {
- if (l) {
- return '\n@#[line:' + l.lineNumber + ',col:' + l.columnNumber + ']';
- }
- }
- function _toString(chars, start, length) {
- if (typeof chars == 'string') {
- return chars.substr(start, length);
- } else {
- //java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)")
- if (chars.length >= start + length || start) {
- return new java.lang.String(chars, start, length) + '';
- }
- return chars;
- }
- }
- /*
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html
- * used method of org.xml.sax.ext.LexicalHandler:
- * #comment(chars, start, length)
- * #startCDATA()
- * #endCDATA()
- * #startDTD(name, publicId, systemId)
- *
- *
- * IGNORED method of org.xml.sax.ext.LexicalHandler:
- * #endDTD()
- * #startEntity(name)
- * #endEntity(name)
- *
- *
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html
- * IGNORED method of org.xml.sax.ext.DeclHandler
- * #attributeDecl(eName, aName, type, mode, value)
- * #elementDecl(name, model)
- * #externalEntityDecl(name, publicId, systemId)
- * #internalEntityDecl(name, value)
- * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html
- * IGNORED method of org.xml.sax.EntityResolver2
- * #resolveEntity(String name,String publicId,String baseURI,String systemId)
- * #resolveEntity(publicId, systemId)
- * #getExternalSubset(name, baseURI)
- * @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html
- * IGNORED method of org.xml.sax.DTDHandler
- * #notationDecl(name, publicId, systemId) {};
- * #unparsedEntityDecl(name, publicId, systemId, notationName) {};
- */
- 'endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl'.replace(
- /\w+/g,
- function (key) {
- DOMHandler.prototype[key] = function () {
- return null;
- };
- }
- );
- /* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */
- function appendElement(handler, node) {
- if (!handler.currentElement) {
- handler.doc.appendChild(node);
- } else {
- handler.currentElement.appendChild(node);
- }
- }
- /**
- * A method that prevents any further parsing when an `error`
- * with level `error` is reported during parsing.
- *
- * @see {@link DOMParserOptions.onError}
- * @see {@link onWarningStopParsing}
- */
- function onErrorStopParsing(level) {
- if (level === 'error') throw 'onErrorStopParsing';
- }
- /**
- * A method that prevents any further parsing when any `error` is reported during parsing.
- *
- * @see {@link DOMParserOptions.onError}
- * @see {@link onErrorStopParsing}
- */
- function onWarningStopParsing() {
- throw 'onWarningStopParsing';
- }
- exports.__DOMHandler = DOMHandler;
- exports.DOMParser = DOMParser;
- exports.normalizeLineEndings = normalizeLineEndings;
- exports.onErrorStopParsing = onErrorStopParsing;
- exports.onWarningStopParsing = onWarningStopParsing;
|