123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- 'use strict';
- /**
- * Detects relevant unicode support for regular expressions in the runtime.
- * Should the runtime not accepts the flag `u` or unicode ranges,
- * character classes without unicode handling will be used.
- *
- * @param {typeof RegExp} [RegExpImpl=RegExp]
- * For testing: the RegExp class.
- * @returns {boolean}
- * @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
- */
- function detectUnicodeSupport(RegExpImpl) {
- try {
- if (typeof RegExpImpl !== 'function') {
- RegExpImpl = RegExp;
- }
- // eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
- var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
- return !!match && match[0].length === 2;
- } catch (error) {}
- return false;
- }
- var UNICODE_SUPPORT = detectUnicodeSupport();
- /**
- * Removes `[`, `]` and any trailing quantifiers from the source of a RegExp.
- *
- * @param {RegExp} regexp
- */
- function chars(regexp) {
- if (regexp.source[0] !== '[') {
- throw new Error(regexp + ' can not be used with chars');
- }
- return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
- }
- /**
- * Creates a new character list regular expression,
- * by removing `search` from the source of `regexp`.
- *
- * @param {RegExp} regexp
- * @param {string} search
- * The character(s) to remove.
- * @returns {RegExp}
- */
- function chars_without(regexp, search) {
- if (regexp.source[0] !== '[') {
- throw new Error('/' + regexp.source + '/ can not be used with chars_without');
- }
- if (!search || typeof search !== 'string') {
- throw new Error(JSON.stringify(search) + ' is not a valid search');
- }
- if (regexp.source.indexOf(search) === -1) {
- throw new Error('"' + search + '" is not is /' + regexp.source + '/');
- }
- if (search === '-' && regexp.source.indexOf(search) !== 1) {
- throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
- }
- return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
- }
- /**
- * Combines and Regular expressions correctly by using `RegExp.source`.
- *
- * @param {...(RegExp | string)[]} args
- * @returns {RegExp}
- */
- function reg(args) {
- var self = this;
- return new RegExp(
- Array.prototype.slice
- .call(arguments)
- .map(function (part) {
- var isStr = typeof part === 'string';
- if (isStr && self === undefined && part === '|') {
- throw new Error('use regg instead of reg to wrap expressions with `|`!');
- }
- return isStr ? part : part.source;
- })
- .join(''),
- UNICODE_SUPPORT ? 'mu' : 'm'
- );
- }
- /**
- * Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
- *
- * @param {...(RegExp | string)[]} args
- * @returns {RegExp}
- */
- function regg(args) {
- if (arguments.length === 0) {
- throw new Error('no parameters provided');
- }
- return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
- }
- // /**
- // * Append ^ to the beginning of the expression.
- // * @param {...(RegExp | string)[]} args
- // * @returns {RegExp}
- // */
- // function reg_start(args) {
- // if (arguments.length === 0) {
- // throw new Error('no parameters provided');
- // }
- // return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
- // }
- // https://www.w3.org/TR/xml/#document
- // `[1] document ::= prolog element Misc*`
- // https://www.w3.org/TR/xml11/#NT-document
- // `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
- /**
- * A character usually appearing in wrongly converted strings.
- *
- * @type {string}
- * @see https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
- * @see https://nodejs.dev/en/api/v18/buffer/#buffers-and-character-encodings
- * @see https://www.unicode.org/faq/utf_bom.html#BOM
- * @readonly
- */
- var UNICODE_REPLACEMENT_CHARACTER = '\uFFFD';
- // https://www.w3.org/TR/xml/#NT-Char
- // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- // `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
- // https://www.w3.org/TR/xml11/#NT-Char
- // `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
- // https://www.w3.org/TR/xml11/#NT-RestrictedChar
- // `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
- // https://www.w3.org/TR/xml11/#charsets
- var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
- if (UNICODE_SUPPORT) {
- // eslint-disable-next-line es5/no-unicode-code-point-escape
- Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
- }
- var _SChar = /[\x20\x09\x0D\x0A]/;
- var SChar_s = chars(_SChar);
- // https://www.w3.org/TR/xml11/#NT-S
- // `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
- var S = reg(_SChar, '+');
- // optional whitespace described as `S?` in the grammar,
- // simplified to 0-n occurrences of the character class
- // instead of 0-1 occurrences of a non-capturing group around S
- var S_OPT = reg(_SChar, '*');
- // https://www.w3.org/TR/xml11/#NT-NameStartChar
- // `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
- var NameStartChar =
- /[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
- if (UNICODE_SUPPORT) {
- // eslint-disable-next-line es5/no-unicode-code-point-escape
- NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
- }
- var NameStartChar_s = chars(NameStartChar);
- // https://www.w3.org/TR/xml11/#NT-NameChar
- // `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
- var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
- // https://www.w3.org/TR/xml11/#NT-Name
- // `[5] Name ::= NameStartChar (NameChar)*`
- var Name = reg(NameStartChar, NameChar, '*');
- /*
- https://www.w3.org/TR/xml11/#NT-Names
- `[6] Names ::= Name (#x20 Name)*`
- */
- // https://www.w3.org/TR/xml11/#NT-Nmtoken
- // `[7] Nmtoken ::= (NameChar)+`
- var Nmtoken = reg(NameChar, '+');
- /*
- https://www.w3.org/TR/xml11/#NT-Nmtokens
- `[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
- var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
- */
- // https://www.w3.org/TR/xml11/#NT-EntityRef
- // `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
- var EntityRef = reg('&', Name, ';');
- // https://www.w3.org/TR/xml11/#NT-CharRef
- // `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
- var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
- /*
- https://www.w3.org/TR/xml11/#NT-Reference
- - `[67] Reference ::= EntityRef | CharRef`
- - `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
- - `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
- */
- var Reference = regg(EntityRef, '|', CharRef);
- // https://www.w3.org/TR/xml11/#NT-PEReference
- // `[69] PEReference ::= '%' Name ';'`
- // [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
- var PEReference = reg('%', Name, ';');
- // https://www.w3.org/TR/xml11/#NT-EntityValue
- // `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
- var EntityValue = regg(
- reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
- '|',
- reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
- );
- // https://www.w3.org/TR/xml11/#NT-AttValue
- // `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
- var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
- // https://www.w3.org/TR/xml-names/#ns-decl
- // https://www.w3.org/TR/xml-names/#ns-qualnames
- // NameStartChar without ":"
- var NCNameStartChar = chars_without(NameStartChar, ':');
- // https://www.w3.org/TR/xml-names/#orphans
- // `[5] NCNameChar ::= NameChar - ':'`
- // An XML NameChar, minus the ":"
- var NCNameChar = chars_without(NameChar, ':');
- // https://www.w3.org/TR/xml-names/#NT-NCName
- // `[4] NCName ::= Name - (Char* ':' Char*)`
- // An XML Name, minus the ":"
- var NCName = reg(NCNameStartChar, NCNameChar, '*');
- /**
- https://www.w3.org/TR/xml-names/#ns-qualnames
- ```
- [7] QName ::= PrefixedName | UnprefixedName
- === (NCName ':' NCName) | NCName
- === NCName (':' NCName)?
- [8] PrefixedName ::= Prefix ':' LocalPart
- === NCName ':' NCName
- [9] UnprefixedName ::= LocalPart
- === NCName
- [10] Prefix ::= NCName
- [11] LocalPart ::= NCName
- ```
- */
- var QName = reg(NCName, regg(':', NCName), '?');
- var QName_exact = reg('^', QName, '$');
- var QName_group = reg('(', QName, ')');
- // https://www.w3.org/TR/xml11/#NT-SystemLiteral
- // `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
- var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
- /*
- https://www.w3.org/TR/xml11/#NT-PI
- ```
- [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
- [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
- ```
- target /xml/i is not excluded!
- */
- var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
- // https://www.w3.org/TR/xml11/#NT-PubidChar
- // `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
- var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
- // https://www.w3.org/TR/xml11/#NT-PubidLiteral
- // `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
- var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
- // https://www.w3.org/TR/xml11/#NT-CharData
- // `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
- var COMMENT_START = '<!--';
- var COMMENT_END = '-->';
- // https://www.w3.org/TR/xml11/#NT-Comment
- // `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
- var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
- var PCDATA = '#PCDATA';
- // https://www.w3.org/TR/xml11/#NT-Mixed
- // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
- // https://www.w3.org/TR/xml-names/#NT-Mixed
- // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
- // [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
- var Mixed = regg(
- reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
- '|',
- reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
- );
- var _children_quantity = /[?*+]?/;
- /*
- `[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
- `[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
- simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
- var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
- ```
- [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
- === (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
- !== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
- ```
- simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
- var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
- */
- /*
- Inefficient regular expression (High)
- This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
- https://github.com/xmldom/xmldom/security/code-scanning/91
- var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
- */
- /*
- Inefficient regular expression (High)
- This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
- https://github.com/xmldom/xmldom/security/code-scanning/92
- var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
- */
- // `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
- // simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
- var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
- // https://www.w3.org/TR/xml11/#NT-contentspec
- // `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
- var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
- var ELEMENTDECL_START = '<!ELEMENT';
- // https://www.w3.org/TR/xml11/#NT-elementdecl
- // `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
- // https://www.w3.org/TR/xml-names/#NT-elementdecl
- // `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
- // because of https://www.w3.org/TR/xml11/#NT-PEReference
- // since xmldom is not supporting replacements of PEReferences in the DTD
- // this also supports PEReference in the possible places
- var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
- // https://www.w3.org/TR/xml11/#NT-NotationType
- // `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
- // [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
- var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
- // https://www.w3.org/TR/xml11/#NT-Enumeration
- // `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
- // [VC: Enumeration] [VC: No Duplicate Tokens]
- var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
- // https://www.w3.org/TR/xml11/#NT-EnumeratedType
- // `[57] EnumeratedType ::= NotationType | Enumeration`
- var EnumeratedType = regg(NotationType, '|', Enumeration);
- /*
- ```
- [55] StringType ::= 'CDATA'
- [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
- | 'IDREF' [VC: IDREF]
- | 'IDREFS' [VC: IDREF]
- | 'ENTITY' [VC: Entity Name]
- | 'ENTITIES' [VC: Entity Name]
- | 'NMTOKEN' [VC: Name Token]
- | 'NMTOKENS' [VC: Name Token]
- [54] AttType ::= StringType | TokenizedType | EnumeratedType
- ```*/
- var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
- // `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
- // [WFC: No < in Attribute Values] [WFC: No External Entity References]
- // [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
- var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
- // https://www.w3.org/TR/xml11/#NT-AttDef
- // [53] AttDef ::= S Name S AttType S DefaultDecl
- // https://www.w3.org/TR/xml-names/#NT-AttDef
- // [1] NSAttName ::= PrefixedAttName | DefaultAttName
- // [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
- // [3] DefaultAttName ::= 'xmlns'
- // [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
- // === S Name S AttType S DefaultDecl
- // xmldom is not distinguishing between QName and NSAttName on this level
- // to support XML without namespaces in DTD we can not restrict it to QName
- var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
- var ATTLIST_DECL_START = '<!ATTLIST';
- // https://www.w3.org/TR/xml11/#NT-AttlistDecl
- // `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
- // https://www.w3.org/TR/xml-names/#NT-AttlistDecl
- // `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
- // to support XML without namespaces in DTD we can not restrict it to QName
- var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
- // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#about:legacy-compat
- var ABOUT_LEGACY_COMPAT = 'about:legacy-compat';
- var ABOUT_LEGACY_COMPAT_SystemLiteral = regg('"' + ABOUT_LEGACY_COMPAT + '"', '|', "'" + ABOUT_LEGACY_COMPAT + "'");
- var SYSTEM = 'SYSTEM';
- var PUBLIC = 'PUBLIC';
- // https://www.w3.org/TR/xml11/#NT-ExternalID
- // `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
- var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
- var ExternalID_match = reg(
- '^',
- regg(
- regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
- '|',
- regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
- )
- );
- // https://www.w3.org/TR/xml11/#NT-NDataDecl
- // `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
- var NDataDecl = regg(S, 'NDATA', S, Name);
- // https://www.w3.org/TR/xml11/#NT-EntityDef
- // `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
- var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
- var ENTITY_DECL_START = '<!ENTITY';
- // https://www.w3.org/TR/xml11/#NT-GEDecl
- // `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
- var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
- // https://www.w3.org/TR/xml11/#NT-PEDef
- // `[74] PEDef ::= EntityValue | ExternalID`
- var PEDef = regg(EntityValue, '|', ExternalID);
- // https://www.w3.org/TR/xml11/#NT-PEDecl
- // `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
- var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
- // https://www.w3.org/TR/xml11/#NT-EntityDecl
- // `[70] EntityDecl ::= GEDecl | PEDecl`
- var EntityDecl = regg(GEDecl, '|', PEDecl);
- // https://www.w3.org/TR/xml11/#NT-PublicID
- // `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
- var PublicID = reg(PUBLIC, S, PubidLiteral);
- // https://www.w3.org/TR/xml11/#NT-NotationDecl
- // `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
- var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
- // https://www.w3.org/TR/xml11/#NT-Eq
- // `[25] Eq ::= S? '=' S?`
- var Eq = reg(S_OPT, '=', S_OPT);
- // https://www.w3.org/TR/xml/#NT-VersionNum
- // `[26] VersionNum ::= '1.' [0-9]+`
- // https://www.w3.org/TR/xml11/#NT-VersionNum
- // `[26] VersionNum ::= '1.1'`
- var VersionNum = /1[.]\d+/;
- // https://www.w3.org/TR/xml11/#NT-VersionInfo
- // `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
- var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
- // https://www.w3.org/TR/xml11/#NT-EncName
- // `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
- var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
- // https://www.w3.org/TR/xml11/#NT-EncDecl
- // `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
- var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
- // https://www.w3.org/TR/xml11/#NT-SDDecl
- // `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
- var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
- // https://www.w3.org/TR/xml11/#NT-XMLDecl
- // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
- var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
- /*
- https://www.w3.org/TR/xml/#NT-markupdecl
- https://www.w3.org/TR/xml11/#NT-markupdecl
- `[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
- var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
- */
- /*
- https://www.w3.org/TR/xml-names/#NT-doctypedecl
- `[28a] DeclSep ::= PEReference | S`
- https://www.w3.org/TR/xml11/#NT-intSubset
- ```
- [28b] intSubset ::= (markupdecl | DeclSep)*
- === (markupdecl | PEReference | S)*
- ```
- [WFC: PE Between Declarations]
- var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
- */
- var DOCTYPE_DECL_START = '<!DOCTYPE';
- /*
- https://www.w3.org/TR/xml11/#NT-doctypedecl
- `[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
- https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
- `[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
- var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
- */
- var CDATA_START = '<![CDATA[';
- var CDATA_END = ']]>';
- var CDStart = /<!\[CDATA\[/;
- var CDEnd = /\]\]>/;
- var CData = reg(Char, '*?', CDEnd);
- /*
- https://www.w3.org/TR/xml/#dt-cdsection
- `[18] CDSect ::= CDStart CData CDEnd`
- `[19] CDStart ::= '<![CDATA['`
- `[20] CData ::= (Char* - (Char* ']]>' Char*))`
- `[21] CDEnd ::= ']]>'`
- */
- var CDSect = reg(CDStart, CData);
- // unit tested
- exports.chars = chars;
- exports.chars_without = chars_without;
- exports.detectUnicodeSupport = detectUnicodeSupport;
- exports.reg = reg;
- exports.regg = regg;
- exports.ABOUT_LEGACY_COMPAT = ABOUT_LEGACY_COMPAT;
- exports.ABOUT_LEGACY_COMPAT_SystemLiteral = ABOUT_LEGACY_COMPAT_SystemLiteral;
- exports.AttlistDecl = AttlistDecl;
- exports.CDATA_START = CDATA_START;
- exports.CDATA_END = CDATA_END;
- exports.CDSect = CDSect;
- exports.Char = Char;
- exports.Comment = Comment;
- exports.COMMENT_START = COMMENT_START;
- exports.COMMENT_END = COMMENT_END;
- exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
- exports.elementdecl = elementdecl;
- exports.EntityDecl = EntityDecl;
- exports.EntityValue = EntityValue;
- exports.ExternalID = ExternalID;
- exports.ExternalID_match = ExternalID_match;
- exports.Name = Name;
- exports.NotationDecl = NotationDecl;
- exports.Reference = Reference;
- exports.PEReference = PEReference;
- exports.PI = PI;
- exports.PUBLIC = PUBLIC;
- exports.PubidLiteral = PubidLiteral;
- exports.QName = QName;
- exports.QName_exact = QName_exact;
- exports.QName_group = QName_group;
- exports.S = S;
- exports.SChar_s = SChar_s;
- exports.S_OPT = S_OPT;
- exports.SYSTEM = SYSTEM;
- exports.SystemLiteral = SystemLiteral;
- exports.UNICODE_REPLACEMENT_CHARACTER = UNICODE_REPLACEMENT_CHARACTER;
- exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
- exports.XMLDecl = XMLDecl;
|