123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929 |
- 'use strict';
- var conventions = require('./conventions');
- var g = require('./grammar');
- var errors = require('./errors');
- var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
- var isHTMLMimeType = conventions.isHTMLMimeType;
- var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
- var hasOwn = conventions.hasOwn;
- var NAMESPACE = conventions.NAMESPACE;
- var ParseError = errors.ParseError;
- var DOMException = errors.DOMException;
- //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
- //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
- //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
- var S_TAG = 0; //tag name offerring
- var S_ATTR = 1; //attr name offerring
- var S_ATTR_SPACE = 2; //attr name end and space offer
- var S_EQ = 3; //=space?
- var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
- var S_ATTR_END = 5; //attr value end and no space(quot end)
- var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
- var S_TAG_CLOSE = 7; //closed el<el />
- function XMLReader() {}
- XMLReader.prototype = {
- parse: function (source, defaultNSMap, entityMap) {
- var domBuilder = this.domBuilder;
- domBuilder.startDocument();
- _copy(defaultNSMap, (defaultNSMap = Object.create(null)));
- parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
- domBuilder.endDocument();
- },
- };
- /**
- * Detecting everything that might be a reference,
- * including those without ending `;`, since those are allowed in HTML.
- * The entityReplacer takes care of verifying and transforming each occurrence,
- * and reports to the errorHandler on those that are not OK,
- * depending on the context.
- */
- var ENTITY_REG = /&#?\w+;?/g;
- function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
- var isHTML = isHTMLMimeType(domBuilder.mimeType);
- if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
- errorHandler.warning('Unicode replacement character detected, source encoding issues?');
- }
- function fixedFromCharCode(code) {
- // String.prototype.fromCharCode does not supports
- // > 2 bytes unicode chars directly
- if (code > 0xffff) {
- code -= 0x10000;
- var surrogate1 = 0xd800 + (code >> 10),
- surrogate2 = 0xdc00 + (code & 0x3ff);
- return String.fromCharCode(surrogate1, surrogate2);
- } else {
- return String.fromCharCode(code);
- }
- }
- function entityReplacer(a) {
- var complete = a[a.length - 1] === ';' ? a : a + ';';
- if (!isHTML && complete !== a) {
- errorHandler.error('EntityRef: expecting ;');
- return a;
- }
- var match = g.Reference.exec(complete);
- if (!match || match[0].length !== complete.length) {
- errorHandler.error('entity not matching Reference production: ' + a);
- return a;
- }
- var k = complete.slice(1, -1);
- if (hasOwn(entityMap, k)) {
- return entityMap[k];
- } else if (k.charAt(0) === '#') {
- return fixedFromCharCode(parseInt(k.substring(1).replace('x', '0x')));
- } else {
- errorHandler.error('entity not found:' + a);
- return a;
- }
- }
- function appendText(end) {
- //has some bugs
- if (end > start) {
- var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
- locator && position(start);
- domBuilder.characters(xt, 0, end - start);
- start = end;
- }
- }
- var lineStart = 0;
- var lineEnd = 0;
- var linePattern = /\r\n?|\n|$/g;
- var locator = domBuilder.locator;
- function position(p, m) {
- while (p >= lineEnd && (m = linePattern.exec(source))) {
- lineStart = lineEnd;
- lineEnd = m.index + m[0].length;
- locator.lineNumber++;
- }
- locator.columnNumber = p - lineStart + 1;
- }
- var parseStack = [{ currentNSMap: defaultNSMapCopy }];
- var unclosedTags = [];
- var start = 0;
- while (true) {
- try {
- var tagStart = source.indexOf('<', start);
- if (tagStart < 0) {
- if (!isHTML && unclosedTags.length > 0) {
- return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
- }
- if (!source.substring(start).match(/^\s*$/)) {
- var doc = domBuilder.doc;
- var text = doc.createTextNode(source.substring(start));
- if (doc.documentElement) {
- return errorHandler.error('Extra content at the end of the document');
- }
- doc.appendChild(text);
- domBuilder.currentElement = text;
- }
- return;
- }
- if (tagStart > start) {
- var fromSource = source.substring(start, tagStart);
- if (!isHTML && unclosedTags.length === 0) {
- fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
- fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
- }
- appendText(tagStart);
- }
- switch (source.charAt(tagStart + 1)) {
- case '/':
- var end = source.indexOf('>', tagStart + 2);
- var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
- if (!tagNameRaw) {
- return errorHandler.fatalError('end tag name missing');
- }
- var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
- if (!tagNameMatch) {
- return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
- }
- if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
- // not enough information to provide a helpful error message,
- // but parsing will throw since there is no root element
- return;
- }
- var currentTagName =
- unclosedTags[unclosedTags.length - 1] ||
- domBuilder.currentElement.tagName ||
- domBuilder.doc.documentElement.tagName ||
- '';
- if (currentTagName !== tagNameMatch[1]) {
- var tagNameLower = tagNameMatch[1].toLowerCase();
- if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
- return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
- }
- }
- var config = parseStack.pop();
- unclosedTags.pop();
- var localNSMap = config.localNSMap;
- domBuilder.endElement(config.uri, config.localName, currentTagName);
- if (localNSMap) {
- for (var prefix in localNSMap) {
- if (hasOwn(localNSMap, prefix)) {
- domBuilder.endPrefixMapping(prefix);
- }
- }
- }
- end++;
- break;
- // end element
- case '?': // <?...?>
- locator && position(tagStart);
- end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
- break;
- case '!': // <!doctype,<![CDATA,<!--
- locator && position(tagStart);
- end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
- break;
- default:
- locator && position(tagStart);
- var el = new ElementAttributes();
- var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
- //elStartEnd
- var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
- var len = el.length;
- if (!el.closed) {
- if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
- el.closed = true;
- } else {
- unclosedTags.push(el.tagName);
- }
- }
- if (locator && len) {
- var locator2 = copyLocator(locator, {});
- //try{//attribute position fixed
- for (var i = 0; i < len; i++) {
- var a = el[i];
- position(a.offset);
- a.locator = copyLocator(locator, {});
- }
- domBuilder.locator = locator2;
- if (appendElement(el, domBuilder, currentNSMap)) {
- parseStack.push(el);
- }
- domBuilder.locator = locator;
- } else {
- if (appendElement(el, domBuilder, currentNSMap)) {
- parseStack.push(el);
- }
- }
- if (isHTML && !el.closed) {
- end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
- } else {
- end++;
- }
- }
- } catch (e) {
- if (e instanceof ParseError) {
- throw e;
- } else if (e instanceof DOMException) {
- throw new ParseError(e.name + ': ' + e.message, domBuilder.locator, e);
- }
- errorHandler.error('element parse error: ' + e);
- end = -1;
- }
- if (end > start) {
- start = end;
- } else {
- //Possible sax fallback here, risk of positional error
- appendText(Math.max(tagStart, start) + 1);
- }
- }
- }
- function copyLocator(f, t) {
- t.lineNumber = f.lineNumber;
- t.columnNumber = f.columnNumber;
- return t;
- }
- /**
- * @returns
- * end of the elementStartPart(end of elementEndPart for selfClosed el)
- * @see {@link #appendElement}
- */
- function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
- /**
- * @param {string} qname
- * @param {string} value
- * @param {number} startIndex
- */
- function addAttribute(qname, value, startIndex) {
- if (hasOwn(el.attributeNames, qname)) {
- return errorHandler.fatalError('Attribute ' + qname + ' redefined');
- }
- if (!isHTML && value.indexOf('<') >= 0) {
- return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
- }
- el.addValue(
- qname,
- // @see https://www.w3.org/TR/xml/#AVNormalize
- // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
- // - recursive replacement of (DTD) entity references
- // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
- value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
- startIndex
- );
- }
- var attrName;
- var value;
- var p = ++start;
- var s = S_TAG; //status
- while (true) {
- var c = source.charAt(p);
- switch (c) {
- case '=':
- if (s === S_ATTR) {
- //attrName
- attrName = source.slice(start, p);
- s = S_EQ;
- } else if (s === S_ATTR_SPACE) {
- s = S_EQ;
- } else {
- //fatalError: equal must after attrName or space after attrName
- throw new Error('attribute equal must after attrName'); // No known test case
- }
- break;
- case "'":
- case '"':
- if (
- s === S_EQ ||
- s === S_ATTR //|| s == S_ATTR_SPACE
- ) {
- //equal
- if (s === S_ATTR) {
- errorHandler.warning('attribute value must after "="');
- attrName = source.slice(start, p);
- }
- start = p + 1;
- p = source.indexOf(c, start);
- if (p > 0) {
- value = source.slice(start, p);
- addAttribute(attrName, value, start - 1);
- s = S_ATTR_END;
- } else {
- //fatalError: no end quot match
- throw new Error("attribute value no end '" + c + "' match");
- }
- } else if (s == S_ATTR_NOQUOT_VALUE) {
- value = source.slice(start, p);
- addAttribute(attrName, value, start);
- errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
- start = p + 1;
- s = S_ATTR_END;
- } else {
- //fatalError: no equal before
- throw new Error('attribute value must after "="'); // No known test case
- }
- break;
- case '/':
- switch (s) {
- case S_TAG:
- el.setTagName(source.slice(start, p));
- case S_ATTR_END:
- case S_TAG_SPACE:
- case S_TAG_CLOSE:
- s = S_TAG_CLOSE;
- el.closed = true;
- case S_ATTR_NOQUOT_VALUE:
- case S_ATTR:
- break;
- case S_ATTR_SPACE:
- el.closed = true;
- break;
- //case S_EQ:
- default:
- throw new Error("attribute invalid close char('/')"); // No known test case
- }
- break;
- case '': //end document
- errorHandler.error('unexpected end of input');
- if (s == S_TAG) {
- el.setTagName(source.slice(start, p));
- }
- return p;
- case '>':
- switch (s) {
- case S_TAG:
- el.setTagName(source.slice(start, p));
- case S_ATTR_END:
- case S_TAG_SPACE:
- case S_TAG_CLOSE:
- break; //normal
- case S_ATTR_NOQUOT_VALUE: //Compatible state
- case S_ATTR:
- value = source.slice(start, p);
- if (value.slice(-1) === '/') {
- el.closed = true;
- value = value.slice(0, -1);
- }
- case S_ATTR_SPACE:
- if (s === S_ATTR_SPACE) {
- value = attrName;
- }
- if (s == S_ATTR_NOQUOT_VALUE) {
- errorHandler.warning('attribute "' + value + '" missed quot(")!');
- addAttribute(attrName, value, start);
- } else {
- if (!isHTML) {
- errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
- }
- addAttribute(value, value, start);
- }
- break;
- case S_EQ:
- if (!isHTML) {
- return errorHandler.fatalError('AttValue: \' or " expected');
- }
- }
- return p;
- /*xml space '\x20' | #x9 | #xD | #xA; */
- case '\u0080':
- c = ' ';
- default:
- if (c <= ' ') {
- //space
- switch (s) {
- case S_TAG:
- el.setTagName(source.slice(start, p)); //tagName
- s = S_TAG_SPACE;
- break;
- case S_ATTR:
- attrName = source.slice(start, p);
- s = S_ATTR_SPACE;
- break;
- case S_ATTR_NOQUOT_VALUE:
- var value = source.slice(start, p);
- errorHandler.warning('attribute "' + value + '" missed quot(")!!');
- addAttribute(attrName, value, start);
- case S_ATTR_END:
- s = S_TAG_SPACE;
- break;
- //case S_TAG_SPACE:
- //case S_EQ:
- //case S_ATTR_SPACE:
- // void();break;
- //case S_TAG_CLOSE:
- //ignore warning
- }
- } else {
- //not space
- //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
- //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
- switch (s) {
- //case S_TAG:void();break;
- //case S_ATTR:void();break;
- //case S_ATTR_NOQUOT_VALUE:void();break;
- case S_ATTR_SPACE:
- if (!isHTML) {
- errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
- }
- addAttribute(attrName, attrName, start);
- start = p;
- s = S_ATTR;
- break;
- case S_ATTR_END:
- errorHandler.warning('attribute space is required"' + attrName + '"!!');
- case S_TAG_SPACE:
- s = S_ATTR;
- start = p;
- break;
- case S_EQ:
- s = S_ATTR_NOQUOT_VALUE;
- start = p;
- break;
- case S_TAG_CLOSE:
- throw new Error("elements closed character '/' and '>' must be connected to");
- }
- }
- } //end outer switch
- p++;
- }
- }
- /**
- * @returns
- * `true` if a new namespace has been defined.
- */
- function appendElement(el, domBuilder, currentNSMap) {
- var tagName = el.tagName;
- var localNSMap = null;
- var i = el.length;
- while (i--) {
- var a = el[i];
- var qName = a.qName;
- var value = a.value;
- var nsp = qName.indexOf(':');
- if (nsp > 0) {
- var prefix = (a.prefix = qName.slice(0, nsp));
- var localName = qName.slice(nsp + 1);
- var nsPrefix = prefix === 'xmlns' && localName;
- } else {
- localName = qName;
- prefix = null;
- nsPrefix = qName === 'xmlns' && '';
- }
- //can not set prefix,because prefix !== ''
- a.localName = localName;
- //prefix == null for no ns prefix attribute
- if (nsPrefix !== false) {
- //hack!!
- if (localNSMap == null) {
- localNSMap = Object.create(null);
- _copy(currentNSMap, (currentNSMap = Object.create(null)));
- }
- currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
- a.uri = NAMESPACE.XMLNS;
- domBuilder.startPrefixMapping(nsPrefix, value);
- }
- }
- var i = el.length;
- while (i--) {
- a = el[i];
- if (a.prefix) {
- //no prefix attribute has no namespace
- if (a.prefix === 'xml') {
- a.uri = NAMESPACE.XML;
- }
- if (a.prefix !== 'xmlns') {
- a.uri = currentNSMap[a.prefix];
- }
- }
- }
- var nsp = tagName.indexOf(':');
- if (nsp > 0) {
- prefix = el.prefix = tagName.slice(0, nsp);
- localName = el.localName = tagName.slice(nsp + 1);
- } else {
- prefix = null; //important!!
- localName = el.localName = tagName;
- }
- //no prefix element has default namespace
- var ns = (el.uri = currentNSMap[prefix || '']);
- domBuilder.startElement(ns, localName, tagName, el);
- //endPrefixMapping and startPrefixMapping have not any help for dom builder
- //localNSMap = null
- if (el.closed) {
- domBuilder.endElement(ns, localName, tagName);
- if (localNSMap) {
- for (prefix in localNSMap) {
- if (hasOwn(localNSMap, prefix)) {
- domBuilder.endPrefixMapping(prefix);
- }
- }
- }
- } else {
- el.currentNSMap = currentNSMap;
- el.localNSMap = localNSMap;
- //parseStack.push(el);
- return true;
- }
- }
- function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
- // https://html.spec.whatwg.org/#raw-text-elements
- // https://html.spec.whatwg.org/#escapable-raw-text-elements
- // https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
- // TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
- var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
- if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
- var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
- var text = source.substring(elStartEnd + 1, elEndStart);
- if (isEscapableRaw) {
- text = text.replace(ENTITY_REG, entityReplacer);
- }
- domBuilder.characters(text, 0, text.length);
- return elEndStart;
- }
- return elStartEnd + 1;
- }
- function _copy(source, target) {
- for (var n in source) {
- if (hasOwn(source, n)) {
- target[n] = source[n];
- }
- }
- }
- /**
- * @typedef ParseUtils
- * @property {function(relativeIndex: number?): string | undefined} char
- * Provides look ahead access to a singe character relative to the current index.
- * @property {function(): number} getIndex
- * Provides read-only access to the current index.
- * @property {function(reg: RegExp): string | null} getMatch
- * Applies the provided regular expression enforcing that it starts at the current index and
- * returns the complete matching string,
- * and moves the current index by the length of the matching string.
- * @property {function(): string} getSource
- * Provides read-only access to the complete source.
- * @property {function(places: number?): void} skip
- * moves the current index by places (defaults to 1)
- * @property {function(): number} skipBlanks
- * Moves the current index by the amount of white space that directly follows the current index
- * and returns the amount of whitespace chars skipped (0..n),
- * or -1 if the end of the source was reached.
- * @property {function(): string} substringFromIndex
- * creates a substring from the current index to the end of `source`
- * @property {function(compareWith: string): boolean} substringStartsWith
- * Checks if `source` contains `compareWith`, starting from the current index.
- * @property {function(compareWith: string): boolean} substringStartsWithCaseInsensitive
- * Checks if `source` contains `compareWith`, starting from the current index,
- * comparing the upper case of both sides.
- * @see {@link parseUtils}
- */
- /**
- * A temporary scope for parsing and look ahead operations in `source`,
- * starting from index `start`.
- *
- * Some operations move the current index by a number of positions,
- * after which `getIndex` returns the new index.
- *
- * @param {string} source
- * @param {number} start
- * @returns {ParseUtils}
- */
- function parseUtils(source, start) {
- var index = start;
- function char(n) {
- n = n || 0;
- return source.charAt(index + n);
- }
- function skip(n) {
- n = n || 1;
- index += n;
- }
- function skipBlanks() {
- var blanks = 0;
- while (index < source.length) {
- var c = char();
- if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
- return blanks;
- }
- blanks++;
- skip();
- }
- return -1;
- }
- function substringFromIndex() {
- return source.substring(index);
- }
- function substringStartsWith(text) {
- return source.substring(index, index + text.length) === text;
- }
- function substringStartsWithCaseInsensitive(text) {
- return source.substring(index, index + text.length).toUpperCase() === text.toUpperCase();
- }
- function getMatch(args) {
- var expr = g.reg('^', args);
- var match = expr.exec(substringFromIndex());
- if (match) {
- skip(match[0].length);
- return match[0];
- }
- return null;
- }
- return {
- char: char,
- getIndex: function () {
- return index;
- },
- getMatch: getMatch,
- getSource: function () {
- return source;
- },
- skip: skip,
- skipBlanks: skipBlanks,
- substringFromIndex: substringFromIndex,
- substringStartsWith: substringStartsWith,
- substringStartsWithCaseInsensitive: substringStartsWithCaseInsensitive,
- };
- }
- /**
- * @param {ParseUtils} p
- * @param {DOMHandler} errorHandler
- * @returns {string}
- */
- function parseDoctypeInternalSubset(p, errorHandler) {
- /**
- * @param {ParseUtils} p
- * @param {DOMHandler} errorHandler
- * @returns {string}
- */
- function parsePI(p, errorHandler) {
- var match = g.PI.exec(p.substringFromIndex());
- if (!match) {
- return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
- }
- if (match[1].toLowerCase() === 'xml') {
- return errorHandler.fatalError(
- 'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
- );
- }
- p.skip(match[0].length);
- return match[0];
- }
- // Parse internal subset
- var source = p.getSource();
- if (p.char() === '[') {
- p.skip(1);
- var intSubsetStart = p.getIndex();
- while (p.getIndex() < source.length) {
- p.skipBlanks();
- if (p.char() === ']') {
- var internalSubset = source.substring(intSubsetStart, p.getIndex());
- p.skip(1);
- return internalSubset;
- }
- var current = null;
- // Only in external subset
- // if (char() === '<' && char(1) === '!' && char(2) === '[') {
- // parseConditionalSections(p, errorHandler);
- // } else
- if (p.char() === '<' && p.char(1) === '!') {
- switch (p.char(2)) {
- case 'E': // ELEMENT | ENTITY
- if (p.char(3) === 'L') {
- current = p.getMatch(g.elementdecl);
- } else if (p.char(3) === 'N') {
- current = p.getMatch(g.EntityDecl);
- }
- break;
- case 'A': // ATTRIBUTE
- current = p.getMatch(g.AttlistDecl);
- break;
- case 'N': // NOTATION
- current = p.getMatch(g.NotationDecl);
- break;
- case '-': // COMMENT
- current = p.getMatch(g.Comment);
- break;
- }
- } else if (p.char() === '<' && p.char(1) === '?') {
- current = parsePI(p, errorHandler);
- } else if (p.char() === '%') {
- current = p.getMatch(g.PEReference);
- } else {
- return errorHandler.fatalError('Error detected in Markup declaration');
- }
- if (!current) {
- return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
- }
- }
- return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
- }
- }
- /**
- * Called when the parser encounters an element starting with '<!'.
- *
- * @param {string} source
- * The xml.
- * @param {number} start
- * the start index of the '<!'
- * @param {DOMHandler} domBuilder
- * @param {DOMHandler} errorHandler
- * @param {boolean} isHTML
- * @returns {number | never}
- * The end index of the element.
- * @throws {ParseError}
- * In case the element is not well-formed.
- */
- function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
- var p = parseUtils(source, start);
- switch (isHTML ? p.char(2).toUpperCase() : p.char(2)) {
- case '-':
- // should be a comment
- var comment = p.getMatch(g.Comment);
- if (comment) {
- domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
- return p.getIndex();
- } else {
- return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
- }
- case '[':
- // should be CDATA
- var cdata = p.getMatch(g.CDSect);
- if (cdata) {
- if (!isHTML && !domBuilder.currentElement) {
- return errorHandler.fatalError('CDATA outside of element');
- }
- domBuilder.startCDATA();
- domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
- domBuilder.endCDATA();
- return p.getIndex();
- } else {
- return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
- }
- case 'D': {
- // should be DOCTYPE
- if (domBuilder.doc && domBuilder.doc.documentElement) {
- return errorHandler.fatalError('Doctype not allowed inside or after documentElement at position ' + p.getIndex());
- }
- if (isHTML ? !p.substringStartsWithCaseInsensitive(g.DOCTYPE_DECL_START) : !p.substringStartsWith(g.DOCTYPE_DECL_START)) {
- return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
- }
- p.skip(g.DOCTYPE_DECL_START.length);
- if (p.skipBlanks() < 1) {
- return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
- }
- var doctype = {
- name: undefined,
- publicId: undefined,
- systemId: undefined,
- internalSubset: undefined,
- };
- // Parse the DOCTYPE name
- doctype.name = p.getMatch(g.Name);
- if (!doctype.name)
- return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
- if (isHTML && doctype.name.toLowerCase() !== 'html') {
- errorHandler.warning('Unexpected DOCTYPE in HTML document at position ' + p.getIndex());
- }
- p.skipBlanks();
- // Check for ExternalID
- if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
- var match = g.ExternalID_match.exec(p.substringFromIndex());
- if (!match) {
- return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
- }
- if (match.groups.SystemLiteralOnly !== undefined) {
- doctype.systemId = match.groups.SystemLiteralOnly;
- } else {
- doctype.systemId = match.groups.SystemLiteral;
- doctype.publicId = match.groups.PubidLiteral;
- }
- p.skip(match[0].length);
- } else if (isHTML && p.substringStartsWithCaseInsensitive(g.SYSTEM)) {
- // https://html.spec.whatwg.org/multipage/syntax.html#doctype-legacy-string
- p.skip(g.SYSTEM.length);
- if (p.skipBlanks() < 1) {
- return errorHandler.fatalError('Expected whitespace after ' + g.SYSTEM + ' at position ' + p.getIndex());
- }
- doctype.systemId = p.getMatch(g.ABOUT_LEGACY_COMPAT_SystemLiteral);
- if (!doctype.systemId) {
- return errorHandler.fatalError(
- 'Expected ' + g.ABOUT_LEGACY_COMPAT + ' in single or double quotes after ' + g.SYSTEM + ' at position ' + p.getIndex()
- );
- }
- }
- if (isHTML && doctype.systemId && !g.ABOUT_LEGACY_COMPAT_SystemLiteral.test(doctype.systemId)) {
- errorHandler.warning('Unexpected doctype.systemId in HTML document at position ' + p.getIndex());
- }
- if (!isHTML) {
- p.skipBlanks();
- doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
- }
- p.skipBlanks();
- if (p.char() !== '>') {
- return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
- }
- p.skip(1);
- domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
- domBuilder.endDTD();
- return p.getIndex();
- }
- default:
- return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
- }
- }
- function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
- var match = source.substring(start).match(g.PI);
- if (!match) {
- return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
- }
- if (match[1].toLowerCase() === 'xml') {
- if (start > 0) {
- return errorHandler.fatalError(
- 'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
- );
- }
- if (!g.XMLDecl.test(source.substring(start))) {
- return errorHandler.fatalError('xml declaration is not well-formed');
- }
- }
- domBuilder.processingInstruction(match[1], match[2]);
- return start + match[0].length;
- }
- function ElementAttributes() {
- this.attributeNames = Object.create(null);
- }
- ElementAttributes.prototype = {
- setTagName: function (tagName) {
- if (!g.QName_exact.test(tagName)) {
- throw new Error('invalid tagName:' + tagName);
- }
- this.tagName = tagName;
- },
- addValue: function (qName, value, offset) {
- if (!g.QName_exact.test(qName)) {
- throw new Error('invalid attribute:' + qName);
- }
- this.attributeNames[qName] = this.length;
- this[this.length++] = { qName: qName, value: value, offset: offset };
- },
- length: 0,
- getLocalName: function (i) {
- return this[i].localName;
- },
- getLocator: function (i) {
- return this[i].locator;
- },
- getQName: function (i) {
- return this[i].qName;
- },
- getURI: function (i) {
- return this[i].uri;
- },
- getValue: function (i) {
- return this[i].value;
- },
- // ,getIndex:function(uri, localName)){
- // if(localName){
- //
- // }else{
- // var qName = uri
- // }
- // },
- // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
- // getType:function(uri,localName){}
- // getType:function(i){},
- };
- exports.XMLReader = XMLReader;
- exports.parseUtils = parseUtils;
- exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;
|