123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992 |
- "use strict";
- Object.defineProperty(exports, "__esModule", { value: true });
- exports.getEncoding = exports.Sniffer = exports.STRINGS = exports.ResultType = void 0;
- var whatwg_encoding_1 = require("whatwg-encoding");
- // https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
- var State;
- (function (State) {
- // Before anything starts; can be any of BOM, UTF-16 XML declarations or meta tags
- State[State["Begin"] = 0] = "Begin";
- // Inside of a BOM
- State[State["BOM16BE"] = 1] = "BOM16BE";
- State[State["BOM16LE"] = 2] = "BOM16LE";
- State[State["BOM8"] = 3] = "BOM8";
- // XML prefix
- State[State["UTF16LE_XML_PREFIX"] = 4] = "UTF16LE_XML_PREFIX";
- State[State["BeginLT"] = 5] = "BeginLT";
- State[State["UTF16BE_XML_PREFIX"] = 6] = "UTF16BE_XML_PREFIX";
- // Waiting for opening `<`
- State[State["BeforeTag"] = 7] = "BeforeTag";
- // After the opening `<`
- State[State["BeforeTagName"] = 8] = "BeforeTagName";
- // After `</`
- State[State["BeforeCloseTagName"] = 9] = "BeforeCloseTagName";
- // Beginning of a comment
- State[State["CommentStart"] = 10] = "CommentStart";
- // End of a comment
- State[State["CommentEnd"] = 11] = "CommentEnd";
- // A tag name that could be `meta`
- State[State["TagNameMeta"] = 12] = "TagNameMeta";
- // A tag name that is not `meta`
- State[State["TagNameOther"] = 13] = "TagNameOther";
- // XML declaration
- State[State["XMLDeclaration"] = 14] = "XMLDeclaration";
- State[State["XMLDeclarationBeforeEncoding"] = 15] = "XMLDeclarationBeforeEncoding";
- State[State["XMLDeclarationAfterEncoding"] = 16] = "XMLDeclarationAfterEncoding";
- State[State["XMLDeclarationBeforeValue"] = 17] = "XMLDeclarationBeforeValue";
- State[State["XMLDeclarationValue"] = 18] = "XMLDeclarationValue";
- // Anything that looks like a tag, but doesn't fit in the above categories
- State[State["WeirdTag"] = 19] = "WeirdTag";
- State[State["BeforeAttribute"] = 20] = "BeforeAttribute";
- /*
- * Attributes in meta tag — we compare them to our set here, and back out
- * We care about four attributes: http-equiv, content-type, content, charset
- */
- State[State["MetaAttribHttpEquiv"] = 21] = "MetaAttribHttpEquiv";
- // The value has to be `content-type`
- State[State["MetaAttribHttpEquivValue"] = 22] = "MetaAttribHttpEquivValue";
- State[State["MetaAttribC"] = 23] = "MetaAttribC";
- State[State["MetaAttribContent"] = 24] = "MetaAttribContent";
- State[State["MetaAttribCharset"] = 25] = "MetaAttribCharset";
- // Waiting for whitespace
- State[State["MetaAttribAfterName"] = 26] = "MetaAttribAfterName";
- State[State["MetaContentValueQuotedBeforeEncoding"] = 27] = "MetaContentValueQuotedBeforeEncoding";
- State[State["MetaContentValueQuotedAfterEncoding"] = 28] = "MetaContentValueQuotedAfterEncoding";
- State[State["MetaContentValueQuotedBeforeValue"] = 29] = "MetaContentValueQuotedBeforeValue";
- State[State["MetaContentValueQuotedValueQuoted"] = 30] = "MetaContentValueQuotedValueQuoted";
- State[State["MetaContentValueQuotedValueUnquoted"] = 31] = "MetaContentValueQuotedValueUnquoted";
- State[State["MetaContentValueUnquotedBeforeEncoding"] = 32] = "MetaContentValueUnquotedBeforeEncoding";
- State[State["MetaContentValueUnquotedBeforeValue"] = 33] = "MetaContentValueUnquotedBeforeValue";
- State[State["MetaContentValueUnquotedValueQuoted"] = 34] = "MetaContentValueUnquotedValueQuoted";
- State[State["MetaContentValueUnquotedValueUnquoted"] = 35] = "MetaContentValueUnquotedValueUnquoted";
- State[State["AnyAttribName"] = 36] = "AnyAttribName";
- // After the name of an attribute, before the equals sign
- State[State["AfterAttributeName"] = 37] = "AfterAttributeName";
- // After `=`
- State[State["BeforeAttributeValue"] = 38] = "BeforeAttributeValue";
- State[State["AttributeValueQuoted"] = 39] = "AttributeValueQuoted";
- State[State["AttributeValueUnquoted"] = 40] = "AttributeValueUnquoted";
- })(State || (State = {}));
- var ResultType;
- (function (ResultType) {
- // Byte order mark
- ResultType[ResultType["BOM"] = 0] = "BOM";
- // User- or transport layer-defined
- ResultType[ResultType["PASSED"] = 1] = "PASSED";
- // XML prefixes
- ResultType[ResultType["XML_PREFIX"] = 2] = "XML_PREFIX";
- // Meta tag
- ResultType[ResultType["META_TAG"] = 3] = "META_TAG";
- // XML encoding
- ResultType[ResultType["XML_ENCODING"] = 4] = "XML_ENCODING";
- // Default
- ResultType[ResultType["DEFAULT"] = 5] = "DEFAULT";
- })(ResultType || (exports.ResultType = ResultType = {}));
- var AttribType;
- (function (AttribType) {
- AttribType[AttribType["None"] = 0] = "None";
- AttribType[AttribType["HttpEquiv"] = 1] = "HttpEquiv";
- AttribType[AttribType["Content"] = 2] = "Content";
- AttribType[AttribType["Charset"] = 3] = "Charset";
- })(AttribType || (AttribType = {}));
- var Chars;
- (function (Chars) {
- Chars[Chars["NIL"] = 0] = "NIL";
- Chars[Chars["TAB"] = 9] = "TAB";
- Chars[Chars["LF"] = 10] = "LF";
- Chars[Chars["CR"] = 13] = "CR";
- Chars[Chars["SPACE"] = 32] = "SPACE";
- Chars[Chars["EXCLAMATION"] = 33] = "EXCLAMATION";
- Chars[Chars["DQUOTE"] = 34] = "DQUOTE";
- Chars[Chars["SQUOTE"] = 39] = "SQUOTE";
- Chars[Chars["DASH"] = 45] = "DASH";
- Chars[Chars["SLASH"] = 47] = "SLASH";
- Chars[Chars["SEMICOLON"] = 59] = "SEMICOLON";
- Chars[Chars["LT"] = 60] = "LT";
- Chars[Chars["EQUALS"] = 61] = "EQUALS";
- Chars[Chars["GT"] = 62] = "GT";
- Chars[Chars["QUESTION"] = 63] = "QUESTION";
- Chars[Chars["UpperA"] = 65] = "UpperA";
- Chars[Chars["UpperZ"] = 90] = "UpperZ";
- Chars[Chars["LowerA"] = 97] = "LowerA";
- Chars[Chars["LowerZ"] = 122] = "LowerZ";
- })(Chars || (Chars = {}));
- var SPACE_CHARACTERS = new Set([Chars.SPACE, Chars.LF, Chars.CR, Chars.TAB]);
- var END_OF_UNQUOTED_ATTRIBUTE_VALUE = new Set([
- Chars.SPACE,
- Chars.LF,
- Chars.CR,
- Chars.TAB,
- Chars.GT,
- ]);
- function toUint8Array(str) {
- var arr = new Uint8Array(str.length);
- for (var i = 0; i < str.length; i++) {
- arr[i] = str.charCodeAt(i);
- }
- return arr;
- }
- exports.STRINGS = {
- UTF8_BOM: new Uint8Array([0xef, 0xbb, 0xbf]),
- UTF16LE_BOM: new Uint8Array([0xff, 0xfe]),
- UTF16BE_BOM: new Uint8Array([0xfe, 0xff]),
- UTF16LE_XML_PREFIX: new Uint8Array([0x3c, 0x0, 0x3f, 0x0, 0x78, 0x0]),
- UTF16BE_XML_PREFIX: new Uint8Array([0x0, 0x3c, 0x0, 0x3f, 0x0, 0x78]),
- XML_DECLARATION: toUint8Array("<?xml"),
- ENCODING: toUint8Array("encoding"),
- META: toUint8Array("meta"),
- HTTP_EQUIV: toUint8Array("http-equiv"),
- CONTENT: toUint8Array("content"),
- CONTENT_TYPE: toUint8Array("content-type"),
- CHARSET: toUint8Array("charset"),
- COMMENT_START: toUint8Array("<!--"),
- COMMENT_END: toUint8Array("-->"),
- };
- function isAsciiAlpha(c) {
- return ((c >= Chars.UpperA && c <= Chars.UpperZ) ||
- (c >= Chars.LowerA && c <= Chars.LowerZ));
- }
- function isQuote(c) {
- return c === Chars.DQUOTE || c === Chars.SQUOTE;
- }
- var Sniffer = /** @class */ (function () {
- function Sniffer(_a) {
- var _b = _a === void 0 ? {} : _a, _c = _b.maxBytes, maxBytes = _c === void 0 ? 1024 : _c, userEncoding = _b.userEncoding, transportLayerEncodingLabel = _b.transportLayerEncodingLabel, defaultEncoding = _b.defaultEncoding;
- /** The offset of the previous buffers. */
- this.offset = 0;
- this.state = State.Begin;
- this.sectionIndex = 0;
- this.attribType = AttribType.None;
- /**
- * Indicates if the `http-equiv` is `content-type`.
- *
- * Initially `null`, a boolean when a value is found.
- */
- this.gotPragma = null;
- this.needsPragma = null;
- this.inMetaTag = false;
- this.encoding = "windows-1252";
- this.resultType = ResultType.DEFAULT;
- this.quoteCharacter = 0;
- this.attributeValue = [];
- this.maxBytes = maxBytes;
- if (userEncoding) {
- this.setResult(userEncoding, ResultType.PASSED);
- }
- if (transportLayerEncodingLabel) {
- this.setResult(transportLayerEncodingLabel, ResultType.PASSED);
- }
- if (defaultEncoding) {
- this.setResult(defaultEncoding, ResultType.DEFAULT);
- }
- }
- Sniffer.prototype.setResult = function (label, type) {
- if (this.resultType === ResultType.DEFAULT || this.resultType > type) {
- var encoding = (0, whatwg_encoding_1.labelToName)(label);
- if (encoding) {
- this.encoding =
- // Check if we are in a meta tag and the encoding is `x-user-defined`
- type === ResultType.META_TAG &&
- encoding === "x-user-defined"
- ? "windows-1252"
- : // Check if we are in a meta tag or xml declaration, and the encoding is UTF-16
- (type === ResultType.META_TAG ||
- type === ResultType.XML_ENCODING) &&
- (encoding === "UTF-16LE" || encoding === "UTF-16BE")
- ? "UTF-8"
- : encoding;
- this.resultType = type;
- }
- }
- };
- Sniffer.prototype.stateBegin = function (c) {
- switch (c) {
- case exports.STRINGS.UTF16BE_BOM[0]: {
- this.state = State.BOM16BE;
- break;
- }
- case exports.STRINGS.UTF16LE_BOM[0]: {
- this.state = State.BOM16LE;
- break;
- }
- case exports.STRINGS.UTF8_BOM[0]: {
- this.sectionIndex = 1;
- this.state = State.BOM8;
- break;
- }
- case Chars.NIL: {
- this.state = State.UTF16BE_XML_PREFIX;
- this.sectionIndex = 1;
- break;
- }
- case Chars.LT: {
- this.state = State.BeginLT;
- break;
- }
- default: {
- this.state = State.BeforeTag;
- }
- }
- };
- Sniffer.prototype.stateBeginLT = function (c) {
- if (c === Chars.NIL) {
- this.state = State.UTF16LE_XML_PREFIX;
- this.sectionIndex = 2;
- }
- else if (c === Chars.QUESTION) {
- this.state = State.XMLDeclaration;
- this.sectionIndex = 2;
- }
- else {
- this.state = State.BeforeTagName;
- this.stateBeforeTagName(c);
- }
- };
- Sniffer.prototype.stateUTF16BE_XML_PREFIX = function (c) {
- // Advance position in the section
- if (this.advanceSection(exports.STRINGS.UTF16BE_XML_PREFIX, c)) {
- if (this.sectionIndex === exports.STRINGS.UTF16BE_XML_PREFIX.length) {
- // We have the whole prefix
- this.setResult("utf-16be", ResultType.XML_PREFIX);
- }
- }
- else {
- this.state = State.BeforeTag;
- this.stateBeforeTag(c);
- }
- };
- Sniffer.prototype.stateUTF16LE_XML_PREFIX = function (c) {
- // Advance position in the section
- if (this.advanceSection(exports.STRINGS.UTF16LE_XML_PREFIX, c)) {
- if (this.sectionIndex === exports.STRINGS.UTF16LE_XML_PREFIX.length) {
- // We have the whole prefix
- this.setResult("utf-16le", ResultType.XML_PREFIX);
- }
- }
- else {
- this.state = State.BeforeTag;
- this.stateBeforeTag(c);
- }
- };
- Sniffer.prototype.stateBOM16LE = function (c) {
- if (c === exports.STRINGS.UTF16LE_BOM[1]) {
- this.setResult("utf-16le", ResultType.BOM);
- }
- else {
- this.state = State.BeforeTag;
- this.stateBeforeTag(c);
- }
- };
- Sniffer.prototype.stateBOM16BE = function (c) {
- if (c === exports.STRINGS.UTF16BE_BOM[1]) {
- this.setResult("utf-16be", ResultType.BOM);
- }
- else {
- this.state = State.BeforeTag;
- this.stateBeforeTag(c);
- }
- };
- Sniffer.prototype.stateBOM8 = function (c) {
- if (this.advanceSection(exports.STRINGS.UTF8_BOM, c) &&
- this.sectionIndex === exports.STRINGS.UTF8_BOM.length) {
- this.setResult("utf-8", ResultType.BOM);
- }
- };
- Sniffer.prototype.stateBeforeTag = function (c) {
- if (c === Chars.LT) {
- this.state = State.BeforeTagName;
- this.inMetaTag = false;
- }
- };
- /**
- * We have seen a `<`, and now have to figure out what to do.
- *
- * Options:
- * - `<meta`
- * - Any other tag
- * - A closing tag
- * - `<!--`
- * - An XML declaration
- *
- */
- Sniffer.prototype.stateBeforeTagName = function (c) {
- if (isAsciiAlpha(c)) {
- if ((c | 0x20) === exports.STRINGS.META[0]) {
- this.sectionIndex = 1;
- this.state = State.TagNameMeta;
- }
- else {
- this.state = State.TagNameOther;
- }
- }
- else
- switch (c) {
- case Chars.SLASH: {
- this.state = State.BeforeCloseTagName;
- break;
- }
- case Chars.EXCLAMATION: {
- this.state = State.CommentStart;
- this.sectionIndex = 2;
- break;
- }
- case Chars.QUESTION: {
- this.state = State.WeirdTag;
- break;
- }
- default: {
- this.state = State.BeforeTag;
- this.stateBeforeTag(c);
- }
- }
- };
- Sniffer.prototype.stateBeforeCloseTagName = function (c) {
- this.state = isAsciiAlpha(c)
- ? // Switch to `TagNameOther`; the HTML spec allows attributes here as well.
- State.TagNameOther
- : State.WeirdTag;
- };
- Sniffer.prototype.stateCommentStart = function (c) {
- if (this.advanceSection(exports.STRINGS.COMMENT_START, c)) {
- if (this.sectionIndex === exports.STRINGS.COMMENT_START.length) {
- this.state = State.CommentEnd;
- // The -- of the comment start can be part of the end.
- this.sectionIndex = 2;
- }
- }
- else {
- this.state = State.WeirdTag;
- this.stateWeirdTag(c);
- }
- };
- Sniffer.prototype.stateCommentEnd = function (c) {
- if (this.advanceSection(exports.STRINGS.COMMENT_END, c)) {
- if (this.sectionIndex === exports.STRINGS.COMMENT_END.length) {
- this.state = State.BeforeTag;
- }
- }
- else if (c === Chars.DASH) {
- /*
- * If we are here, we know we expected a `>` above.
- * Set this to 2, to support many dashes before the closing `>`.
- */
- this.sectionIndex = 2;
- }
- };
- /**
- * Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
- */
- Sniffer.prototype.stateWeirdTag = function (c) {
- if (c === Chars.GT) {
- this.state = State.BeforeTag;
- }
- };
- /**
- * Advances the section, ignoring upper/lower case.
- *
- * Make sure the section has left-over characters before calling.
- *
- * @returns `false` if we did not match the section.
- */
- Sniffer.prototype.advanceSectionIC = function (section, c) {
- return this.advanceSection(section, c | 0x20);
- };
- /**
- * Advances the section.
- *
- * Make sure the section has left-over characters before calling.
- *
- * @returns `false` if we did not match the section.
- */
- Sniffer.prototype.advanceSection = function (section, c) {
- if (section[this.sectionIndex] === c) {
- this.sectionIndex++;
- return true;
- }
- this.sectionIndex = 0;
- return false;
- };
- Sniffer.prototype.stateTagNameMeta = function (c) {
- if (this.sectionIndex < exports.STRINGS.META.length) {
- if (this.advanceSectionIC(exports.STRINGS.META, c)) {
- return;
- }
- }
- else if (SPACE_CHARACTERS.has(c)) {
- this.inMetaTag = true;
- this.gotPragma = null;
- this.needsPragma = null;
- this.state = State.BeforeAttribute;
- return;
- }
- this.state = State.TagNameOther;
- // Reconsume in case there is a `>`.
- this.stateTagNameOther(c);
- };
- Sniffer.prototype.stateTagNameOther = function (c) {
- if (SPACE_CHARACTERS.has(c)) {
- this.state = State.BeforeAttribute;
- }
- else if (c === Chars.GT) {
- this.state = State.BeforeTag;
- }
- };
- Sniffer.prototype.stateBeforeAttribute = function (c) {
- if (SPACE_CHARACTERS.has(c))
- return;
- if (this.inMetaTag) {
- var lower = c | 0x20;
- if (lower === exports.STRINGS.HTTP_EQUIV[0]) {
- this.sectionIndex = 1;
- this.state = State.MetaAttribHttpEquiv;
- return;
- }
- else if (lower === exports.STRINGS.CHARSET[0]) {
- this.sectionIndex = 1;
- this.state = State.MetaAttribC;
- return;
- }
- }
- this.state =
- c === Chars.SLASH || c === Chars.GT
- ? State.BeforeTag
- : State.AnyAttribName;
- };
- Sniffer.prototype.handleMetaAttrib = function (c, section, type) {
- if (this.advanceSectionIC(section, c)) {
- if (this.sectionIndex === section.length) {
- this.attribType = type;
- this.state = State.MetaAttribAfterName;
- }
- }
- else {
- this.state = State.AnyAttribName;
- this.stateAnyAttribName(c);
- }
- };
- Sniffer.prototype.stateMetaAttribHttpEquiv = function (c) {
- this.handleMetaAttrib(c, exports.STRINGS.HTTP_EQUIV, AttribType.HttpEquiv);
- };
- Sniffer.prototype.stateMetaAttribC = function (c) {
- var lower = c | 0x20;
- if (lower === exports.STRINGS.CHARSET[1]) {
- this.sectionIndex = 2;
- this.state = State.MetaAttribCharset;
- }
- else if (lower === exports.STRINGS.CONTENT[1]) {
- this.sectionIndex = 2;
- this.state = State.MetaAttribContent;
- }
- else {
- this.state = State.AnyAttribName;
- this.stateAnyAttribName(c);
- }
- };
- Sniffer.prototype.stateMetaAttribCharset = function (c) {
- this.handleMetaAttrib(c, exports.STRINGS.CHARSET, AttribType.Charset);
- };
- Sniffer.prototype.stateMetaAttribContent = function (c) {
- this.handleMetaAttrib(c, exports.STRINGS.CONTENT, AttribType.Content);
- };
- Sniffer.prototype.stateMetaAttribAfterName = function (c) {
- if (SPACE_CHARACTERS.has(c) || c === Chars.EQUALS) {
- this.state = State.AfterAttributeName;
- this.stateAfterAttributeName(c);
- }
- else {
- this.state = State.AnyAttribName;
- this.stateAnyAttribName(c);
- }
- };
- Sniffer.prototype.stateAnyAttribName = function (c) {
- if (SPACE_CHARACTERS.has(c)) {
- this.attribType = AttribType.None;
- this.state = State.AfterAttributeName;
- }
- else if (c === Chars.SLASH || c === Chars.GT) {
- this.state = State.BeforeTag;
- }
- else if (c === Chars.EQUALS) {
- this.state = State.BeforeAttributeValue;
- }
- };
- Sniffer.prototype.stateAfterAttributeName = function (c) {
- if (SPACE_CHARACTERS.has(c))
- return;
- if (c === Chars.EQUALS) {
- this.state = State.BeforeAttributeValue;
- }
- else {
- this.state = State.BeforeAttribute;
- this.stateBeforeAttribute(c);
- }
- };
- Sniffer.prototype.stateBeforeAttributeValue = function (c) {
- if (SPACE_CHARACTERS.has(c))
- return;
- this.attributeValue.length = 0;
- this.sectionIndex = 0;
- if (isQuote(c)) {
- this.quoteCharacter = c;
- this.state =
- this.attribType === AttribType.Content
- ? State.MetaContentValueQuotedBeforeEncoding
- : this.attribType === AttribType.HttpEquiv
- ? State.MetaAttribHttpEquivValue
- : State.AttributeValueQuoted;
- }
- else if (this.attribType === AttribType.Content) {
- this.state = State.MetaContentValueUnquotedBeforeEncoding;
- this.stateMetaContentValueUnquotedBeforeEncoding(c);
- }
- else if (this.attribType === AttribType.HttpEquiv) {
- // We use `quoteCharacter = 0` to signify that the value is unquoted.
- this.quoteCharacter = 0;
- this.sectionIndex = 0;
- this.state = State.MetaAttribHttpEquivValue;
- this.stateMetaAttribHttpEquivValue(c);
- }
- else {
- this.state = State.AttributeValueUnquoted;
- this.stateAttributeValueUnquoted(c);
- }
- };
- // The value has to be `content-type`
- Sniffer.prototype.stateMetaAttribHttpEquivValue = function (c) {
- if (this.sectionIndex === exports.STRINGS.CONTENT_TYPE.length) {
- if (this.quoteCharacter === 0
- ? END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)
- : c === this.quoteCharacter) {
- if (this.needsPragma !== null) {
- this.setResult(this.needsPragma, ResultType.META_TAG);
- }
- else if (this.gotPragma === null) {
- this.gotPragma = true;
- }
- this.state = State.BeforeAttribute;
- return;
- }
- }
- else if (this.advanceSectionIC(exports.STRINGS.CONTENT_TYPE, c)) {
- return;
- }
- this.gotPragma = false;
- if (this.quoteCharacter === 0) {
- this.state = State.AttributeValueUnquoted;
- this.stateAttributeValueUnquoted(c);
- }
- else {
- this.state = State.AttributeValueQuoted;
- this.stateAttributeValueQuoted(c);
- }
- };
- Sniffer.prototype.handleMetaContentValue = function () {
- if (this.attributeValue.length === 0)
- return;
- var encoding = String.fromCharCode.apply(String, this.attributeValue);
- if (this.gotPragma) {
- this.setResult(encoding, ResultType.META_TAG);
- }
- else if (this.needsPragma === null) {
- // Don't override a previous result.
- this.needsPragma = encoding;
- }
- this.attributeValue.length = 0;
- };
- Sniffer.prototype.handleAttributeValue = function () {
- if (this.attribType === AttribType.Charset) {
- this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.META_TAG);
- }
- };
- Sniffer.prototype.stateAttributeValueUnquoted = function (c) {
- if (SPACE_CHARACTERS.has(c)) {
- this.handleAttributeValue();
- this.state = State.BeforeAttribute;
- }
- else if (c === Chars.SLASH || c === Chars.GT) {
- this.handleAttributeValue();
- this.state = State.BeforeTag;
- }
- else if (this.attribType === AttribType.Charset) {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.findMetaContentEncoding = function (c) {
- if (this.advanceSectionIC(exports.STRINGS.CHARSET, c)) {
- if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
- return true;
- }
- }
- else {
- // If we encountered another `c`, assume we started over.
- this.sectionIndex = Number(c === exports.STRINGS.CHARSET[0]);
- }
- return false;
- };
- Sniffer.prototype.stateMetaContentValueUnquotedBeforeEncoding = function (c) {
- if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
- this.stateAttributeValueUnquoted(c);
- }
- else if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
- if (c === Chars.EQUALS) {
- this.state = State.MetaContentValueUnquotedBeforeValue;
- }
- }
- else {
- this.findMetaContentEncoding(c);
- }
- };
- Sniffer.prototype.stateMetaContentValueUnquotedBeforeValue = function (c) {
- if (isQuote(c)) {
- this.quoteCharacter = c;
- this.state = State.MetaContentValueUnquotedValueQuoted;
- }
- else if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
- // Can't have spaces here, as it would no longer be part of the attribute value.
- this.stateAttributeValueUnquoted(c);
- }
- else {
- this.state = State.MetaContentValueUnquotedValueUnquoted;
- this.stateMetaContentValueUnquotedValueUnquoted(c);
- }
- };
- Sniffer.prototype.stateMetaContentValueUnquotedValueQuoted = function (c) {
- if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
- // Quotes weren't matched, so we're done.
- this.stateAttributeValueUnquoted(c);
- }
- else if (c === this.quoteCharacter) {
- this.handleMetaContentValue();
- this.state = State.AttributeValueUnquoted;
- }
- else {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.stateMetaContentValueUnquotedValueUnquoted = function (c) {
- if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c) || c === Chars.SEMICOLON) {
- this.handleMetaContentValue();
- this.state = State.AttributeValueUnquoted;
- this.stateAttributeValueUnquoted(c);
- }
- else {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.stateMetaContentValueQuotedValueUnquoted = function (c) {
- if (isQuote(c) || SPACE_CHARACTERS.has(c) || c === Chars.SEMICOLON) {
- this.handleMetaContentValue();
- // We are done with the value, but might not be at the end of the attribute
- this.state = State.AttributeValueQuoted;
- this.stateAttributeValueQuoted(c);
- }
- else {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.stateMetaContentValueQuotedValueQuoted = function (c) {
- if (isQuote(c)) {
- // We have reached the end of our value.
- if (c !== this.quoteCharacter) {
- // Only handle the value if inner quotes were matched.
- this.handleMetaContentValue();
- }
- this.state = State.AttributeValueQuoted;
- this.stateAttributeValueQuoted(c);
- }
- else {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.stateMetaContentValueQuotedBeforeEncoding = function (c) {
- if (c === this.quoteCharacter) {
- this.stateAttributeValueQuoted(c);
- }
- else if (this.findMetaContentEncoding(c)) {
- this.state = State.MetaContentValueQuotedAfterEncoding;
- }
- };
- Sniffer.prototype.stateMetaContentValueQuotedAfterEncoding = function (c) {
- if (c === Chars.EQUALS) {
- this.state = State.MetaContentValueQuotedBeforeValue;
- }
- else if (!SPACE_CHARACTERS.has(c)) {
- // Look for the next encoding
- this.state = State.MetaContentValueQuotedBeforeEncoding;
- this.stateMetaContentValueQuotedBeforeEncoding(c);
- }
- };
- Sniffer.prototype.stateMetaContentValueQuotedBeforeValue = function (c) {
- if (c === this.quoteCharacter) {
- this.stateAttributeValueQuoted(c);
- }
- else if (isQuote(c)) {
- this.state = State.MetaContentValueQuotedValueQuoted;
- }
- else if (!SPACE_CHARACTERS.has(c)) {
- this.state = State.MetaContentValueQuotedValueUnquoted;
- this.stateMetaContentValueQuotedValueUnquoted(c);
- }
- };
- Sniffer.prototype.stateAttributeValueQuoted = function (c) {
- if (c === this.quoteCharacter) {
- this.handleAttributeValue();
- this.state = State.BeforeAttribute;
- }
- else if (this.attribType === AttribType.Charset) {
- this.attributeValue.push(c | 0x20);
- }
- };
- // Read STRINGS.XML_DECLARATION
- Sniffer.prototype.stateXMLDeclaration = function (c) {
- if (this.advanceSection(exports.STRINGS.XML_DECLARATION, c)) {
- if (this.sectionIndex === exports.STRINGS.XML_DECLARATION.length) {
- this.sectionIndex = 0;
- this.state = State.XMLDeclarationBeforeEncoding;
- }
- }
- else {
- this.state = State.WeirdTag;
- }
- };
- Sniffer.prototype.stateXMLDeclarationBeforeEncoding = function (c) {
- if (this.advanceSection(exports.STRINGS.ENCODING, c)) {
- if (this.sectionIndex === exports.STRINGS.ENCODING.length) {
- this.state = State.XMLDeclarationAfterEncoding;
- }
- }
- else if (c === Chars.GT) {
- this.state = State.BeforeTag;
- }
- else {
- // If we encountered another `c`, assume we started over.
- this.sectionIndex = Number(c === exports.STRINGS.ENCODING[0]);
- }
- };
- Sniffer.prototype.stateXMLDeclarationAfterEncoding = function (c) {
- if (c === Chars.EQUALS) {
- this.state = State.XMLDeclarationBeforeValue;
- }
- else if (c > Chars.SPACE) {
- this.state = State.WeirdTag;
- this.stateWeirdTag(c);
- }
- };
- Sniffer.prototype.stateXMLDeclarationBeforeValue = function (c) {
- if (isQuote(c)) {
- this.attributeValue.length = 0;
- this.state = State.XMLDeclarationValue;
- }
- else if (c > Chars.SPACE) {
- this.state = State.WeirdTag;
- this.stateWeirdTag(c);
- }
- };
- Sniffer.prototype.stateXMLDeclarationValue = function (c) {
- if (isQuote(c)) {
- this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.XML_ENCODING);
- this.state = State.WeirdTag;
- }
- else if (c === Chars.GT) {
- this.state = State.BeforeTag;
- }
- else if (c <= Chars.SPACE) {
- this.state = State.WeirdTag;
- }
- else {
- this.attributeValue.push(c | 0x20);
- }
- };
- Sniffer.prototype.write = function (buffer) {
- var index = 0;
- for (; index < buffer.length && this.offset + index < this.maxBytes; index++) {
- var c = buffer[index];
- switch (this.state) {
- case State.Begin: {
- this.stateBegin(c);
- break;
- }
- case State.BOM16BE: {
- this.stateBOM16BE(c);
- break;
- }
- case State.BOM16LE: {
- this.stateBOM16LE(c);
- break;
- }
- case State.BOM8: {
- this.stateBOM8(c);
- break;
- }
- case State.UTF16LE_XML_PREFIX: {
- this.stateUTF16LE_XML_PREFIX(c);
- break;
- }
- case State.BeginLT: {
- this.stateBeginLT(c);
- break;
- }
- case State.UTF16BE_XML_PREFIX: {
- this.stateUTF16BE_XML_PREFIX(c);
- break;
- }
- case State.BeforeTag: {
- // Optimization: Skip all characters until we find a `<`
- var idx = buffer.indexOf(Chars.LT, index);
- if (idx < 0) {
- // We are done with this buffer. Stay in the state and try on the next one.
- index = buffer.length;
- }
- else {
- index = idx;
- this.stateBeforeTag(Chars.LT);
- }
- break;
- }
- case State.BeforeTagName: {
- this.stateBeforeTagName(c);
- break;
- }
- case State.BeforeCloseTagName: {
- this.stateBeforeCloseTagName(c);
- break;
- }
- case State.CommentStart: {
- this.stateCommentStart(c);
- break;
- }
- case State.CommentEnd: {
- this.stateCommentEnd(c);
- break;
- }
- case State.TagNameMeta: {
- this.stateTagNameMeta(c);
- break;
- }
- case State.TagNameOther: {
- this.stateTagNameOther(c);
- break;
- }
- case State.XMLDeclaration: {
- this.stateXMLDeclaration(c);
- break;
- }
- case State.XMLDeclarationBeforeEncoding: {
- this.stateXMLDeclarationBeforeEncoding(c);
- break;
- }
- case State.XMLDeclarationAfterEncoding: {
- this.stateXMLDeclarationAfterEncoding(c);
- break;
- }
- case State.XMLDeclarationBeforeValue: {
- this.stateXMLDeclarationBeforeValue(c);
- break;
- }
- case State.XMLDeclarationValue: {
- this.stateXMLDeclarationValue(c);
- break;
- }
- case State.WeirdTag: {
- this.stateWeirdTag(c);
- break;
- }
- case State.BeforeAttribute: {
- this.stateBeforeAttribute(c);
- break;
- }
- case State.MetaAttribHttpEquiv: {
- this.stateMetaAttribHttpEquiv(c);
- break;
- }
- case State.MetaAttribHttpEquivValue: {
- this.stateMetaAttribHttpEquivValue(c);
- break;
- }
- case State.MetaAttribC: {
- this.stateMetaAttribC(c);
- break;
- }
- case State.MetaAttribContent: {
- this.stateMetaAttribContent(c);
- break;
- }
- case State.MetaAttribCharset: {
- this.stateMetaAttribCharset(c);
- break;
- }
- case State.MetaAttribAfterName: {
- this.stateMetaAttribAfterName(c);
- break;
- }
- case State.MetaContentValueQuotedBeforeEncoding: {
- this.stateMetaContentValueQuotedBeforeEncoding(c);
- break;
- }
- case State.MetaContentValueQuotedAfterEncoding: {
- this.stateMetaContentValueQuotedAfterEncoding(c);
- break;
- }
- case State.MetaContentValueQuotedBeforeValue: {
- this.stateMetaContentValueQuotedBeforeValue(c);
- break;
- }
- case State.MetaContentValueQuotedValueQuoted: {
- this.stateMetaContentValueQuotedValueQuoted(c);
- break;
- }
- case State.MetaContentValueQuotedValueUnquoted: {
- this.stateMetaContentValueQuotedValueUnquoted(c);
- break;
- }
- case State.MetaContentValueUnquotedBeforeEncoding: {
- this.stateMetaContentValueUnquotedBeforeEncoding(c);
- break;
- }
- case State.MetaContentValueUnquotedBeforeValue: {
- this.stateMetaContentValueUnquotedBeforeValue(c);
- break;
- }
- case State.MetaContentValueUnquotedValueQuoted: {
- this.stateMetaContentValueUnquotedValueQuoted(c);
- break;
- }
- case State.MetaContentValueUnquotedValueUnquoted: {
- this.stateMetaContentValueUnquotedValueUnquoted(c);
- break;
- }
- case State.AnyAttribName: {
- this.stateAnyAttribName(c);
- break;
- }
- case State.AfterAttributeName: {
- this.stateAfterAttributeName(c);
- break;
- }
- case State.BeforeAttributeValue: {
- this.stateBeforeAttributeValue(c);
- break;
- }
- case State.AttributeValueQuoted: {
- this.stateAttributeValueQuoted(c);
- break;
- }
- default: {
- // (State.AttributeValueUnquoted)
- this.stateAttributeValueUnquoted(c);
- }
- }
- }
- this.offset += index;
- };
- return Sniffer;
- }());
- exports.Sniffer = Sniffer;
- /** Get the encoding for the passed buffer. */
- function getEncoding(buffer, options) {
- var sniffer = new Sniffer(options);
- sniffer.write(buffer);
- return sniffer.encoding;
- }
- exports.getEncoding = getEncoding;
- //# sourceMappingURL=sniffer.js.map
|