sniffer.js 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.getEncoding = exports.Sniffer = exports.STRINGS = exports.ResultType = void 0;
  4. var whatwg_encoding_1 = require("whatwg-encoding");
  5. // https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
  6. var State;
  7. (function (State) {
  8. // Before anything starts; can be any of BOM, UTF-16 XML declarations or meta tags
  9. State[State["Begin"] = 0] = "Begin";
  10. // Inside of a BOM
  11. State[State["BOM16BE"] = 1] = "BOM16BE";
  12. State[State["BOM16LE"] = 2] = "BOM16LE";
  13. State[State["BOM8"] = 3] = "BOM8";
  14. // XML prefix
  15. State[State["UTF16LE_XML_PREFIX"] = 4] = "UTF16LE_XML_PREFIX";
  16. State[State["BeginLT"] = 5] = "BeginLT";
  17. State[State["UTF16BE_XML_PREFIX"] = 6] = "UTF16BE_XML_PREFIX";
  18. // Waiting for opening `<`
  19. State[State["BeforeTag"] = 7] = "BeforeTag";
  20. // After the opening `<`
  21. State[State["BeforeTagName"] = 8] = "BeforeTagName";
  22. // After `</`
  23. State[State["BeforeCloseTagName"] = 9] = "BeforeCloseTagName";
  24. // Beginning of a comment
  25. State[State["CommentStart"] = 10] = "CommentStart";
  26. // End of a comment
  27. State[State["CommentEnd"] = 11] = "CommentEnd";
  28. // A tag name that could be `meta`
  29. State[State["TagNameMeta"] = 12] = "TagNameMeta";
  30. // A tag name that is not `meta`
  31. State[State["TagNameOther"] = 13] = "TagNameOther";
  32. // XML declaration
  33. State[State["XMLDeclaration"] = 14] = "XMLDeclaration";
  34. State[State["XMLDeclarationBeforeEncoding"] = 15] = "XMLDeclarationBeforeEncoding";
  35. State[State["XMLDeclarationAfterEncoding"] = 16] = "XMLDeclarationAfterEncoding";
  36. State[State["XMLDeclarationBeforeValue"] = 17] = "XMLDeclarationBeforeValue";
  37. State[State["XMLDeclarationValue"] = 18] = "XMLDeclarationValue";
  38. // Anything that looks like a tag, but doesn't fit in the above categories
  39. State[State["WeirdTag"] = 19] = "WeirdTag";
  40. State[State["BeforeAttribute"] = 20] = "BeforeAttribute";
  41. /*
  42. * Attributes in meta tag — we compare them to our set here, and back out
  43. * We care about four attributes: http-equiv, content-type, content, charset
  44. */
  45. State[State["MetaAttribHttpEquiv"] = 21] = "MetaAttribHttpEquiv";
  46. // The value has to be `content-type`
  47. State[State["MetaAttribHttpEquivValue"] = 22] = "MetaAttribHttpEquivValue";
  48. State[State["MetaAttribC"] = 23] = "MetaAttribC";
  49. State[State["MetaAttribContent"] = 24] = "MetaAttribContent";
  50. State[State["MetaAttribCharset"] = 25] = "MetaAttribCharset";
  51. // Waiting for whitespace
  52. State[State["MetaAttribAfterName"] = 26] = "MetaAttribAfterName";
  53. State[State["MetaContentValueQuotedBeforeEncoding"] = 27] = "MetaContentValueQuotedBeforeEncoding";
  54. State[State["MetaContentValueQuotedAfterEncoding"] = 28] = "MetaContentValueQuotedAfterEncoding";
  55. State[State["MetaContentValueQuotedBeforeValue"] = 29] = "MetaContentValueQuotedBeforeValue";
  56. State[State["MetaContentValueQuotedValueQuoted"] = 30] = "MetaContentValueQuotedValueQuoted";
  57. State[State["MetaContentValueQuotedValueUnquoted"] = 31] = "MetaContentValueQuotedValueUnquoted";
  58. State[State["MetaContentValueUnquotedBeforeEncoding"] = 32] = "MetaContentValueUnquotedBeforeEncoding";
  59. State[State["MetaContentValueUnquotedBeforeValue"] = 33] = "MetaContentValueUnquotedBeforeValue";
  60. State[State["MetaContentValueUnquotedValueQuoted"] = 34] = "MetaContentValueUnquotedValueQuoted";
  61. State[State["MetaContentValueUnquotedValueUnquoted"] = 35] = "MetaContentValueUnquotedValueUnquoted";
  62. State[State["AnyAttribName"] = 36] = "AnyAttribName";
  63. // After the name of an attribute, before the equals sign
  64. State[State["AfterAttributeName"] = 37] = "AfterAttributeName";
  65. // After `=`
  66. State[State["BeforeAttributeValue"] = 38] = "BeforeAttributeValue";
  67. State[State["AttributeValueQuoted"] = 39] = "AttributeValueQuoted";
  68. State[State["AttributeValueUnquoted"] = 40] = "AttributeValueUnquoted";
  69. })(State || (State = {}));
  70. var ResultType;
  71. (function (ResultType) {
  72. // Byte order mark
  73. ResultType[ResultType["BOM"] = 0] = "BOM";
  74. // User- or transport layer-defined
  75. ResultType[ResultType["PASSED"] = 1] = "PASSED";
  76. // XML prefixes
  77. ResultType[ResultType["XML_PREFIX"] = 2] = "XML_PREFIX";
  78. // Meta tag
  79. ResultType[ResultType["META_TAG"] = 3] = "META_TAG";
  80. // XML encoding
  81. ResultType[ResultType["XML_ENCODING"] = 4] = "XML_ENCODING";
  82. // Default
  83. ResultType[ResultType["DEFAULT"] = 5] = "DEFAULT";
  84. })(ResultType || (exports.ResultType = ResultType = {}));
  85. var AttribType;
  86. (function (AttribType) {
  87. AttribType[AttribType["None"] = 0] = "None";
  88. AttribType[AttribType["HttpEquiv"] = 1] = "HttpEquiv";
  89. AttribType[AttribType["Content"] = 2] = "Content";
  90. AttribType[AttribType["Charset"] = 3] = "Charset";
  91. })(AttribType || (AttribType = {}));
  92. var Chars;
  93. (function (Chars) {
  94. Chars[Chars["NIL"] = 0] = "NIL";
  95. Chars[Chars["TAB"] = 9] = "TAB";
  96. Chars[Chars["LF"] = 10] = "LF";
  97. Chars[Chars["CR"] = 13] = "CR";
  98. Chars[Chars["SPACE"] = 32] = "SPACE";
  99. Chars[Chars["EXCLAMATION"] = 33] = "EXCLAMATION";
  100. Chars[Chars["DQUOTE"] = 34] = "DQUOTE";
  101. Chars[Chars["SQUOTE"] = 39] = "SQUOTE";
  102. Chars[Chars["DASH"] = 45] = "DASH";
  103. Chars[Chars["SLASH"] = 47] = "SLASH";
  104. Chars[Chars["SEMICOLON"] = 59] = "SEMICOLON";
  105. Chars[Chars["LT"] = 60] = "LT";
  106. Chars[Chars["EQUALS"] = 61] = "EQUALS";
  107. Chars[Chars["GT"] = 62] = "GT";
  108. Chars[Chars["QUESTION"] = 63] = "QUESTION";
  109. Chars[Chars["UpperA"] = 65] = "UpperA";
  110. Chars[Chars["UpperZ"] = 90] = "UpperZ";
  111. Chars[Chars["LowerA"] = 97] = "LowerA";
  112. Chars[Chars["LowerZ"] = 122] = "LowerZ";
  113. })(Chars || (Chars = {}));
  114. var SPACE_CHARACTERS = new Set([Chars.SPACE, Chars.LF, Chars.CR, Chars.TAB]);
  115. var END_OF_UNQUOTED_ATTRIBUTE_VALUE = new Set([
  116. Chars.SPACE,
  117. Chars.LF,
  118. Chars.CR,
  119. Chars.TAB,
  120. Chars.GT,
  121. ]);
  122. function toUint8Array(str) {
  123. var arr = new Uint8Array(str.length);
  124. for (var i = 0; i < str.length; i++) {
  125. arr[i] = str.charCodeAt(i);
  126. }
  127. return arr;
  128. }
  129. exports.STRINGS = {
  130. UTF8_BOM: new Uint8Array([0xef, 0xbb, 0xbf]),
  131. UTF16LE_BOM: new Uint8Array([0xff, 0xfe]),
  132. UTF16BE_BOM: new Uint8Array([0xfe, 0xff]),
  133. UTF16LE_XML_PREFIX: new Uint8Array([0x3c, 0x0, 0x3f, 0x0, 0x78, 0x0]),
  134. UTF16BE_XML_PREFIX: new Uint8Array([0x0, 0x3c, 0x0, 0x3f, 0x0, 0x78]),
  135. XML_DECLARATION: toUint8Array("<?xml"),
  136. ENCODING: toUint8Array("encoding"),
  137. META: toUint8Array("meta"),
  138. HTTP_EQUIV: toUint8Array("http-equiv"),
  139. CONTENT: toUint8Array("content"),
  140. CONTENT_TYPE: toUint8Array("content-type"),
  141. CHARSET: toUint8Array("charset"),
  142. COMMENT_START: toUint8Array("<!--"),
  143. COMMENT_END: toUint8Array("-->"),
  144. };
  145. function isAsciiAlpha(c) {
  146. return ((c >= Chars.UpperA && c <= Chars.UpperZ) ||
  147. (c >= Chars.LowerA && c <= Chars.LowerZ));
  148. }
  149. function isQuote(c) {
  150. return c === Chars.DQUOTE || c === Chars.SQUOTE;
  151. }
  152. var Sniffer = /** @class */ (function () {
  153. function Sniffer(_a) {
  154. var _b = _a === void 0 ? {} : _a, _c = _b.maxBytes, maxBytes = _c === void 0 ? 1024 : _c, userEncoding = _b.userEncoding, transportLayerEncodingLabel = _b.transportLayerEncodingLabel, defaultEncoding = _b.defaultEncoding;
  155. /** The offset of the previous buffers. */
  156. this.offset = 0;
  157. this.state = State.Begin;
  158. this.sectionIndex = 0;
  159. this.attribType = AttribType.None;
  160. /**
  161. * Indicates if the `http-equiv` is `content-type`.
  162. *
  163. * Initially `null`, a boolean when a value is found.
  164. */
  165. this.gotPragma = null;
  166. this.needsPragma = null;
  167. this.inMetaTag = false;
  168. this.encoding = "windows-1252";
  169. this.resultType = ResultType.DEFAULT;
  170. this.quoteCharacter = 0;
  171. this.attributeValue = [];
  172. this.maxBytes = maxBytes;
  173. if (userEncoding) {
  174. this.setResult(userEncoding, ResultType.PASSED);
  175. }
  176. if (transportLayerEncodingLabel) {
  177. this.setResult(transportLayerEncodingLabel, ResultType.PASSED);
  178. }
  179. if (defaultEncoding) {
  180. this.setResult(defaultEncoding, ResultType.DEFAULT);
  181. }
  182. }
  183. Sniffer.prototype.setResult = function (label, type) {
  184. if (this.resultType === ResultType.DEFAULT || this.resultType > type) {
  185. var encoding = (0, whatwg_encoding_1.labelToName)(label);
  186. if (encoding) {
  187. this.encoding =
  188. // Check if we are in a meta tag and the encoding is `x-user-defined`
  189. type === ResultType.META_TAG &&
  190. encoding === "x-user-defined"
  191. ? "windows-1252"
  192. : // Check if we are in a meta tag or xml declaration, and the encoding is UTF-16
  193. (type === ResultType.META_TAG ||
  194. type === ResultType.XML_ENCODING) &&
  195. (encoding === "UTF-16LE" || encoding === "UTF-16BE")
  196. ? "UTF-8"
  197. : encoding;
  198. this.resultType = type;
  199. }
  200. }
  201. };
  202. Sniffer.prototype.stateBegin = function (c) {
  203. switch (c) {
  204. case exports.STRINGS.UTF16BE_BOM[0]: {
  205. this.state = State.BOM16BE;
  206. break;
  207. }
  208. case exports.STRINGS.UTF16LE_BOM[0]: {
  209. this.state = State.BOM16LE;
  210. break;
  211. }
  212. case exports.STRINGS.UTF8_BOM[0]: {
  213. this.sectionIndex = 1;
  214. this.state = State.BOM8;
  215. break;
  216. }
  217. case Chars.NIL: {
  218. this.state = State.UTF16BE_XML_PREFIX;
  219. this.sectionIndex = 1;
  220. break;
  221. }
  222. case Chars.LT: {
  223. this.state = State.BeginLT;
  224. break;
  225. }
  226. default: {
  227. this.state = State.BeforeTag;
  228. }
  229. }
  230. };
  231. Sniffer.prototype.stateBeginLT = function (c) {
  232. if (c === Chars.NIL) {
  233. this.state = State.UTF16LE_XML_PREFIX;
  234. this.sectionIndex = 2;
  235. }
  236. else if (c === Chars.QUESTION) {
  237. this.state = State.XMLDeclaration;
  238. this.sectionIndex = 2;
  239. }
  240. else {
  241. this.state = State.BeforeTagName;
  242. this.stateBeforeTagName(c);
  243. }
  244. };
  245. Sniffer.prototype.stateUTF16BE_XML_PREFIX = function (c) {
  246. // Advance position in the section
  247. if (this.advanceSection(exports.STRINGS.UTF16BE_XML_PREFIX, c)) {
  248. if (this.sectionIndex === exports.STRINGS.UTF16BE_XML_PREFIX.length) {
  249. // We have the whole prefix
  250. this.setResult("utf-16be", ResultType.XML_PREFIX);
  251. }
  252. }
  253. else {
  254. this.state = State.BeforeTag;
  255. this.stateBeforeTag(c);
  256. }
  257. };
  258. Sniffer.prototype.stateUTF16LE_XML_PREFIX = function (c) {
  259. // Advance position in the section
  260. if (this.advanceSection(exports.STRINGS.UTF16LE_XML_PREFIX, c)) {
  261. if (this.sectionIndex === exports.STRINGS.UTF16LE_XML_PREFIX.length) {
  262. // We have the whole prefix
  263. this.setResult("utf-16le", ResultType.XML_PREFIX);
  264. }
  265. }
  266. else {
  267. this.state = State.BeforeTag;
  268. this.stateBeforeTag(c);
  269. }
  270. };
  271. Sniffer.prototype.stateBOM16LE = function (c) {
  272. if (c === exports.STRINGS.UTF16LE_BOM[1]) {
  273. this.setResult("utf-16le", ResultType.BOM);
  274. }
  275. else {
  276. this.state = State.BeforeTag;
  277. this.stateBeforeTag(c);
  278. }
  279. };
  280. Sniffer.prototype.stateBOM16BE = function (c) {
  281. if (c === exports.STRINGS.UTF16BE_BOM[1]) {
  282. this.setResult("utf-16be", ResultType.BOM);
  283. }
  284. else {
  285. this.state = State.BeforeTag;
  286. this.stateBeforeTag(c);
  287. }
  288. };
  289. Sniffer.prototype.stateBOM8 = function (c) {
  290. if (this.advanceSection(exports.STRINGS.UTF8_BOM, c) &&
  291. this.sectionIndex === exports.STRINGS.UTF8_BOM.length) {
  292. this.setResult("utf-8", ResultType.BOM);
  293. }
  294. };
  295. Sniffer.prototype.stateBeforeTag = function (c) {
  296. if (c === Chars.LT) {
  297. this.state = State.BeforeTagName;
  298. this.inMetaTag = false;
  299. }
  300. };
  301. /**
  302. * We have seen a `<`, and now have to figure out what to do.
  303. *
  304. * Options:
  305. * - `<meta`
  306. * - Any other tag
  307. * - A closing tag
  308. * - `<!--`
  309. * - An XML declaration
  310. *
  311. */
  312. Sniffer.prototype.stateBeforeTagName = function (c) {
  313. if (isAsciiAlpha(c)) {
  314. if ((c | 0x20) === exports.STRINGS.META[0]) {
  315. this.sectionIndex = 1;
  316. this.state = State.TagNameMeta;
  317. }
  318. else {
  319. this.state = State.TagNameOther;
  320. }
  321. }
  322. else
  323. switch (c) {
  324. case Chars.SLASH: {
  325. this.state = State.BeforeCloseTagName;
  326. break;
  327. }
  328. case Chars.EXCLAMATION: {
  329. this.state = State.CommentStart;
  330. this.sectionIndex = 2;
  331. break;
  332. }
  333. case Chars.QUESTION: {
  334. this.state = State.WeirdTag;
  335. break;
  336. }
  337. default: {
  338. this.state = State.BeforeTag;
  339. this.stateBeforeTag(c);
  340. }
  341. }
  342. };
  343. Sniffer.prototype.stateBeforeCloseTagName = function (c) {
  344. this.state = isAsciiAlpha(c)
  345. ? // Switch to `TagNameOther`; the HTML spec allows attributes here as well.
  346. State.TagNameOther
  347. : State.WeirdTag;
  348. };
  349. Sniffer.prototype.stateCommentStart = function (c) {
  350. if (this.advanceSection(exports.STRINGS.COMMENT_START, c)) {
  351. if (this.sectionIndex === exports.STRINGS.COMMENT_START.length) {
  352. this.state = State.CommentEnd;
  353. // The -- of the comment start can be part of the end.
  354. this.sectionIndex = 2;
  355. }
  356. }
  357. else {
  358. this.state = State.WeirdTag;
  359. this.stateWeirdTag(c);
  360. }
  361. };
  362. Sniffer.prototype.stateCommentEnd = function (c) {
  363. if (this.advanceSection(exports.STRINGS.COMMENT_END, c)) {
  364. if (this.sectionIndex === exports.STRINGS.COMMENT_END.length) {
  365. this.state = State.BeforeTag;
  366. }
  367. }
  368. else if (c === Chars.DASH) {
  369. /*
  370. * If we are here, we know we expected a `>` above.
  371. * Set this to 2, to support many dashes before the closing `>`.
  372. */
  373. this.sectionIndex = 2;
  374. }
  375. };
  376. /**
  377. * Any section starting with `<!`, `<?`, `</`, without being a closing tag or comment.
  378. */
  379. Sniffer.prototype.stateWeirdTag = function (c) {
  380. if (c === Chars.GT) {
  381. this.state = State.BeforeTag;
  382. }
  383. };
  384. /**
  385. * Advances the section, ignoring upper/lower case.
  386. *
  387. * Make sure the section has left-over characters before calling.
  388. *
  389. * @returns `false` if we did not match the section.
  390. */
  391. Sniffer.prototype.advanceSectionIC = function (section, c) {
  392. return this.advanceSection(section, c | 0x20);
  393. };
  394. /**
  395. * Advances the section.
  396. *
  397. * Make sure the section has left-over characters before calling.
  398. *
  399. * @returns `false` if we did not match the section.
  400. */
  401. Sniffer.prototype.advanceSection = function (section, c) {
  402. if (section[this.sectionIndex] === c) {
  403. this.sectionIndex++;
  404. return true;
  405. }
  406. this.sectionIndex = 0;
  407. return false;
  408. };
  409. Sniffer.prototype.stateTagNameMeta = function (c) {
  410. if (this.sectionIndex < exports.STRINGS.META.length) {
  411. if (this.advanceSectionIC(exports.STRINGS.META, c)) {
  412. return;
  413. }
  414. }
  415. else if (SPACE_CHARACTERS.has(c)) {
  416. this.inMetaTag = true;
  417. this.gotPragma = null;
  418. this.needsPragma = null;
  419. this.state = State.BeforeAttribute;
  420. return;
  421. }
  422. this.state = State.TagNameOther;
  423. // Reconsume in case there is a `>`.
  424. this.stateTagNameOther(c);
  425. };
  426. Sniffer.prototype.stateTagNameOther = function (c) {
  427. if (SPACE_CHARACTERS.has(c)) {
  428. this.state = State.BeforeAttribute;
  429. }
  430. else if (c === Chars.GT) {
  431. this.state = State.BeforeTag;
  432. }
  433. };
  434. Sniffer.prototype.stateBeforeAttribute = function (c) {
  435. if (SPACE_CHARACTERS.has(c))
  436. return;
  437. if (this.inMetaTag) {
  438. var lower = c | 0x20;
  439. if (lower === exports.STRINGS.HTTP_EQUIV[0]) {
  440. this.sectionIndex = 1;
  441. this.state = State.MetaAttribHttpEquiv;
  442. return;
  443. }
  444. else if (lower === exports.STRINGS.CHARSET[0]) {
  445. this.sectionIndex = 1;
  446. this.state = State.MetaAttribC;
  447. return;
  448. }
  449. }
  450. this.state =
  451. c === Chars.SLASH || c === Chars.GT
  452. ? State.BeforeTag
  453. : State.AnyAttribName;
  454. };
  455. Sniffer.prototype.handleMetaAttrib = function (c, section, type) {
  456. if (this.advanceSectionIC(section, c)) {
  457. if (this.sectionIndex === section.length) {
  458. this.attribType = type;
  459. this.state = State.MetaAttribAfterName;
  460. }
  461. }
  462. else {
  463. this.state = State.AnyAttribName;
  464. this.stateAnyAttribName(c);
  465. }
  466. };
  467. Sniffer.prototype.stateMetaAttribHttpEquiv = function (c) {
  468. this.handleMetaAttrib(c, exports.STRINGS.HTTP_EQUIV, AttribType.HttpEquiv);
  469. };
  470. Sniffer.prototype.stateMetaAttribC = function (c) {
  471. var lower = c | 0x20;
  472. if (lower === exports.STRINGS.CHARSET[1]) {
  473. this.sectionIndex = 2;
  474. this.state = State.MetaAttribCharset;
  475. }
  476. else if (lower === exports.STRINGS.CONTENT[1]) {
  477. this.sectionIndex = 2;
  478. this.state = State.MetaAttribContent;
  479. }
  480. else {
  481. this.state = State.AnyAttribName;
  482. this.stateAnyAttribName(c);
  483. }
  484. };
  485. Sniffer.prototype.stateMetaAttribCharset = function (c) {
  486. this.handleMetaAttrib(c, exports.STRINGS.CHARSET, AttribType.Charset);
  487. };
  488. Sniffer.prototype.stateMetaAttribContent = function (c) {
  489. this.handleMetaAttrib(c, exports.STRINGS.CONTENT, AttribType.Content);
  490. };
  491. Sniffer.prototype.stateMetaAttribAfterName = function (c) {
  492. if (SPACE_CHARACTERS.has(c) || c === Chars.EQUALS) {
  493. this.state = State.AfterAttributeName;
  494. this.stateAfterAttributeName(c);
  495. }
  496. else {
  497. this.state = State.AnyAttribName;
  498. this.stateAnyAttribName(c);
  499. }
  500. };
  501. Sniffer.prototype.stateAnyAttribName = function (c) {
  502. if (SPACE_CHARACTERS.has(c)) {
  503. this.attribType = AttribType.None;
  504. this.state = State.AfterAttributeName;
  505. }
  506. else if (c === Chars.SLASH || c === Chars.GT) {
  507. this.state = State.BeforeTag;
  508. }
  509. else if (c === Chars.EQUALS) {
  510. this.state = State.BeforeAttributeValue;
  511. }
  512. };
  513. Sniffer.prototype.stateAfterAttributeName = function (c) {
  514. if (SPACE_CHARACTERS.has(c))
  515. return;
  516. if (c === Chars.EQUALS) {
  517. this.state = State.BeforeAttributeValue;
  518. }
  519. else {
  520. this.state = State.BeforeAttribute;
  521. this.stateBeforeAttribute(c);
  522. }
  523. };
  524. Sniffer.prototype.stateBeforeAttributeValue = function (c) {
  525. if (SPACE_CHARACTERS.has(c))
  526. return;
  527. this.attributeValue.length = 0;
  528. this.sectionIndex = 0;
  529. if (isQuote(c)) {
  530. this.quoteCharacter = c;
  531. this.state =
  532. this.attribType === AttribType.Content
  533. ? State.MetaContentValueQuotedBeforeEncoding
  534. : this.attribType === AttribType.HttpEquiv
  535. ? State.MetaAttribHttpEquivValue
  536. : State.AttributeValueQuoted;
  537. }
  538. else if (this.attribType === AttribType.Content) {
  539. this.state = State.MetaContentValueUnquotedBeforeEncoding;
  540. this.stateMetaContentValueUnquotedBeforeEncoding(c);
  541. }
  542. else if (this.attribType === AttribType.HttpEquiv) {
  543. // We use `quoteCharacter = 0` to signify that the value is unquoted.
  544. this.quoteCharacter = 0;
  545. this.sectionIndex = 0;
  546. this.state = State.MetaAttribHttpEquivValue;
  547. this.stateMetaAttribHttpEquivValue(c);
  548. }
  549. else {
  550. this.state = State.AttributeValueUnquoted;
  551. this.stateAttributeValueUnquoted(c);
  552. }
  553. };
  554. // The value has to be `content-type`
  555. Sniffer.prototype.stateMetaAttribHttpEquivValue = function (c) {
  556. if (this.sectionIndex === exports.STRINGS.CONTENT_TYPE.length) {
  557. if (this.quoteCharacter === 0
  558. ? END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)
  559. : c === this.quoteCharacter) {
  560. if (this.needsPragma !== null) {
  561. this.setResult(this.needsPragma, ResultType.META_TAG);
  562. }
  563. else if (this.gotPragma === null) {
  564. this.gotPragma = true;
  565. }
  566. this.state = State.BeforeAttribute;
  567. return;
  568. }
  569. }
  570. else if (this.advanceSectionIC(exports.STRINGS.CONTENT_TYPE, c)) {
  571. return;
  572. }
  573. this.gotPragma = false;
  574. if (this.quoteCharacter === 0) {
  575. this.state = State.AttributeValueUnquoted;
  576. this.stateAttributeValueUnquoted(c);
  577. }
  578. else {
  579. this.state = State.AttributeValueQuoted;
  580. this.stateAttributeValueQuoted(c);
  581. }
  582. };
  583. Sniffer.prototype.handleMetaContentValue = function () {
  584. if (this.attributeValue.length === 0)
  585. return;
  586. var encoding = String.fromCharCode.apply(String, this.attributeValue);
  587. if (this.gotPragma) {
  588. this.setResult(encoding, ResultType.META_TAG);
  589. }
  590. else if (this.needsPragma === null) {
  591. // Don't override a previous result.
  592. this.needsPragma = encoding;
  593. }
  594. this.attributeValue.length = 0;
  595. };
  596. Sniffer.prototype.handleAttributeValue = function () {
  597. if (this.attribType === AttribType.Charset) {
  598. this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.META_TAG);
  599. }
  600. };
  601. Sniffer.prototype.stateAttributeValueUnquoted = function (c) {
  602. if (SPACE_CHARACTERS.has(c)) {
  603. this.handleAttributeValue();
  604. this.state = State.BeforeAttribute;
  605. }
  606. else if (c === Chars.SLASH || c === Chars.GT) {
  607. this.handleAttributeValue();
  608. this.state = State.BeforeTag;
  609. }
  610. else if (this.attribType === AttribType.Charset) {
  611. this.attributeValue.push(c | 0x20);
  612. }
  613. };
  614. Sniffer.prototype.findMetaContentEncoding = function (c) {
  615. if (this.advanceSectionIC(exports.STRINGS.CHARSET, c)) {
  616. if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
  617. return true;
  618. }
  619. }
  620. else {
  621. // If we encountered another `c`, assume we started over.
  622. this.sectionIndex = Number(c === exports.STRINGS.CHARSET[0]);
  623. }
  624. return false;
  625. };
  626. Sniffer.prototype.stateMetaContentValueUnquotedBeforeEncoding = function (c) {
  627. if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
  628. this.stateAttributeValueUnquoted(c);
  629. }
  630. else if (this.sectionIndex === exports.STRINGS.CHARSET.length) {
  631. if (c === Chars.EQUALS) {
  632. this.state = State.MetaContentValueUnquotedBeforeValue;
  633. }
  634. }
  635. else {
  636. this.findMetaContentEncoding(c);
  637. }
  638. };
  639. Sniffer.prototype.stateMetaContentValueUnquotedBeforeValue = function (c) {
  640. if (isQuote(c)) {
  641. this.quoteCharacter = c;
  642. this.state = State.MetaContentValueUnquotedValueQuoted;
  643. }
  644. else if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
  645. // Can't have spaces here, as it would no longer be part of the attribute value.
  646. this.stateAttributeValueUnquoted(c);
  647. }
  648. else {
  649. this.state = State.MetaContentValueUnquotedValueUnquoted;
  650. this.stateMetaContentValueUnquotedValueUnquoted(c);
  651. }
  652. };
  653. Sniffer.prototype.stateMetaContentValueUnquotedValueQuoted = function (c) {
  654. if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c)) {
  655. // Quotes weren't matched, so we're done.
  656. this.stateAttributeValueUnquoted(c);
  657. }
  658. else if (c === this.quoteCharacter) {
  659. this.handleMetaContentValue();
  660. this.state = State.AttributeValueUnquoted;
  661. }
  662. else {
  663. this.attributeValue.push(c | 0x20);
  664. }
  665. };
  666. Sniffer.prototype.stateMetaContentValueUnquotedValueUnquoted = function (c) {
  667. if (END_OF_UNQUOTED_ATTRIBUTE_VALUE.has(c) || c === Chars.SEMICOLON) {
  668. this.handleMetaContentValue();
  669. this.state = State.AttributeValueUnquoted;
  670. this.stateAttributeValueUnquoted(c);
  671. }
  672. else {
  673. this.attributeValue.push(c | 0x20);
  674. }
  675. };
  676. Sniffer.prototype.stateMetaContentValueQuotedValueUnquoted = function (c) {
  677. if (isQuote(c) || SPACE_CHARACTERS.has(c) || c === Chars.SEMICOLON) {
  678. this.handleMetaContentValue();
  679. // We are done with the value, but might not be at the end of the attribute
  680. this.state = State.AttributeValueQuoted;
  681. this.stateAttributeValueQuoted(c);
  682. }
  683. else {
  684. this.attributeValue.push(c | 0x20);
  685. }
  686. };
  687. Sniffer.prototype.stateMetaContentValueQuotedValueQuoted = function (c) {
  688. if (isQuote(c)) {
  689. // We have reached the end of our value.
  690. if (c !== this.quoteCharacter) {
  691. // Only handle the value if inner quotes were matched.
  692. this.handleMetaContentValue();
  693. }
  694. this.state = State.AttributeValueQuoted;
  695. this.stateAttributeValueQuoted(c);
  696. }
  697. else {
  698. this.attributeValue.push(c | 0x20);
  699. }
  700. };
  701. Sniffer.prototype.stateMetaContentValueQuotedBeforeEncoding = function (c) {
  702. if (c === this.quoteCharacter) {
  703. this.stateAttributeValueQuoted(c);
  704. }
  705. else if (this.findMetaContentEncoding(c)) {
  706. this.state = State.MetaContentValueQuotedAfterEncoding;
  707. }
  708. };
  709. Sniffer.prototype.stateMetaContentValueQuotedAfterEncoding = function (c) {
  710. if (c === Chars.EQUALS) {
  711. this.state = State.MetaContentValueQuotedBeforeValue;
  712. }
  713. else if (!SPACE_CHARACTERS.has(c)) {
  714. // Look for the next encoding
  715. this.state = State.MetaContentValueQuotedBeforeEncoding;
  716. this.stateMetaContentValueQuotedBeforeEncoding(c);
  717. }
  718. };
  719. Sniffer.prototype.stateMetaContentValueQuotedBeforeValue = function (c) {
  720. if (c === this.quoteCharacter) {
  721. this.stateAttributeValueQuoted(c);
  722. }
  723. else if (isQuote(c)) {
  724. this.state = State.MetaContentValueQuotedValueQuoted;
  725. }
  726. else if (!SPACE_CHARACTERS.has(c)) {
  727. this.state = State.MetaContentValueQuotedValueUnquoted;
  728. this.stateMetaContentValueQuotedValueUnquoted(c);
  729. }
  730. };
  731. Sniffer.prototype.stateAttributeValueQuoted = function (c) {
  732. if (c === this.quoteCharacter) {
  733. this.handleAttributeValue();
  734. this.state = State.BeforeAttribute;
  735. }
  736. else if (this.attribType === AttribType.Charset) {
  737. this.attributeValue.push(c | 0x20);
  738. }
  739. };
  740. // Read STRINGS.XML_DECLARATION
  741. Sniffer.prototype.stateXMLDeclaration = function (c) {
  742. if (this.advanceSection(exports.STRINGS.XML_DECLARATION, c)) {
  743. if (this.sectionIndex === exports.STRINGS.XML_DECLARATION.length) {
  744. this.sectionIndex = 0;
  745. this.state = State.XMLDeclarationBeforeEncoding;
  746. }
  747. }
  748. else {
  749. this.state = State.WeirdTag;
  750. }
  751. };
  752. Sniffer.prototype.stateXMLDeclarationBeforeEncoding = function (c) {
  753. if (this.advanceSection(exports.STRINGS.ENCODING, c)) {
  754. if (this.sectionIndex === exports.STRINGS.ENCODING.length) {
  755. this.state = State.XMLDeclarationAfterEncoding;
  756. }
  757. }
  758. else if (c === Chars.GT) {
  759. this.state = State.BeforeTag;
  760. }
  761. else {
  762. // If we encountered another `c`, assume we started over.
  763. this.sectionIndex = Number(c === exports.STRINGS.ENCODING[0]);
  764. }
  765. };
  766. Sniffer.prototype.stateXMLDeclarationAfterEncoding = function (c) {
  767. if (c === Chars.EQUALS) {
  768. this.state = State.XMLDeclarationBeforeValue;
  769. }
  770. else if (c > Chars.SPACE) {
  771. this.state = State.WeirdTag;
  772. this.stateWeirdTag(c);
  773. }
  774. };
  775. Sniffer.prototype.stateXMLDeclarationBeforeValue = function (c) {
  776. if (isQuote(c)) {
  777. this.attributeValue.length = 0;
  778. this.state = State.XMLDeclarationValue;
  779. }
  780. else if (c > Chars.SPACE) {
  781. this.state = State.WeirdTag;
  782. this.stateWeirdTag(c);
  783. }
  784. };
  785. Sniffer.prototype.stateXMLDeclarationValue = function (c) {
  786. if (isQuote(c)) {
  787. this.setResult(String.fromCharCode.apply(String, this.attributeValue), ResultType.XML_ENCODING);
  788. this.state = State.WeirdTag;
  789. }
  790. else if (c === Chars.GT) {
  791. this.state = State.BeforeTag;
  792. }
  793. else if (c <= Chars.SPACE) {
  794. this.state = State.WeirdTag;
  795. }
  796. else {
  797. this.attributeValue.push(c | 0x20);
  798. }
  799. };
  800. Sniffer.prototype.write = function (buffer) {
  801. var index = 0;
  802. for (; index < buffer.length && this.offset + index < this.maxBytes; index++) {
  803. var c = buffer[index];
  804. switch (this.state) {
  805. case State.Begin: {
  806. this.stateBegin(c);
  807. break;
  808. }
  809. case State.BOM16BE: {
  810. this.stateBOM16BE(c);
  811. break;
  812. }
  813. case State.BOM16LE: {
  814. this.stateBOM16LE(c);
  815. break;
  816. }
  817. case State.BOM8: {
  818. this.stateBOM8(c);
  819. break;
  820. }
  821. case State.UTF16LE_XML_PREFIX: {
  822. this.stateUTF16LE_XML_PREFIX(c);
  823. break;
  824. }
  825. case State.BeginLT: {
  826. this.stateBeginLT(c);
  827. break;
  828. }
  829. case State.UTF16BE_XML_PREFIX: {
  830. this.stateUTF16BE_XML_PREFIX(c);
  831. break;
  832. }
  833. case State.BeforeTag: {
  834. // Optimization: Skip all characters until we find a `<`
  835. var idx = buffer.indexOf(Chars.LT, index);
  836. if (idx < 0) {
  837. // We are done with this buffer. Stay in the state and try on the next one.
  838. index = buffer.length;
  839. }
  840. else {
  841. index = idx;
  842. this.stateBeforeTag(Chars.LT);
  843. }
  844. break;
  845. }
  846. case State.BeforeTagName: {
  847. this.stateBeforeTagName(c);
  848. break;
  849. }
  850. case State.BeforeCloseTagName: {
  851. this.stateBeforeCloseTagName(c);
  852. break;
  853. }
  854. case State.CommentStart: {
  855. this.stateCommentStart(c);
  856. break;
  857. }
  858. case State.CommentEnd: {
  859. this.stateCommentEnd(c);
  860. break;
  861. }
  862. case State.TagNameMeta: {
  863. this.stateTagNameMeta(c);
  864. break;
  865. }
  866. case State.TagNameOther: {
  867. this.stateTagNameOther(c);
  868. break;
  869. }
  870. case State.XMLDeclaration: {
  871. this.stateXMLDeclaration(c);
  872. break;
  873. }
  874. case State.XMLDeclarationBeforeEncoding: {
  875. this.stateXMLDeclarationBeforeEncoding(c);
  876. break;
  877. }
  878. case State.XMLDeclarationAfterEncoding: {
  879. this.stateXMLDeclarationAfterEncoding(c);
  880. break;
  881. }
  882. case State.XMLDeclarationBeforeValue: {
  883. this.stateXMLDeclarationBeforeValue(c);
  884. break;
  885. }
  886. case State.XMLDeclarationValue: {
  887. this.stateXMLDeclarationValue(c);
  888. break;
  889. }
  890. case State.WeirdTag: {
  891. this.stateWeirdTag(c);
  892. break;
  893. }
  894. case State.BeforeAttribute: {
  895. this.stateBeforeAttribute(c);
  896. break;
  897. }
  898. case State.MetaAttribHttpEquiv: {
  899. this.stateMetaAttribHttpEquiv(c);
  900. break;
  901. }
  902. case State.MetaAttribHttpEquivValue: {
  903. this.stateMetaAttribHttpEquivValue(c);
  904. break;
  905. }
  906. case State.MetaAttribC: {
  907. this.stateMetaAttribC(c);
  908. break;
  909. }
  910. case State.MetaAttribContent: {
  911. this.stateMetaAttribContent(c);
  912. break;
  913. }
  914. case State.MetaAttribCharset: {
  915. this.stateMetaAttribCharset(c);
  916. break;
  917. }
  918. case State.MetaAttribAfterName: {
  919. this.stateMetaAttribAfterName(c);
  920. break;
  921. }
  922. case State.MetaContentValueQuotedBeforeEncoding: {
  923. this.stateMetaContentValueQuotedBeforeEncoding(c);
  924. break;
  925. }
  926. case State.MetaContentValueQuotedAfterEncoding: {
  927. this.stateMetaContentValueQuotedAfterEncoding(c);
  928. break;
  929. }
  930. case State.MetaContentValueQuotedBeforeValue: {
  931. this.stateMetaContentValueQuotedBeforeValue(c);
  932. break;
  933. }
  934. case State.MetaContentValueQuotedValueQuoted: {
  935. this.stateMetaContentValueQuotedValueQuoted(c);
  936. break;
  937. }
  938. case State.MetaContentValueQuotedValueUnquoted: {
  939. this.stateMetaContentValueQuotedValueUnquoted(c);
  940. break;
  941. }
  942. case State.MetaContentValueUnquotedBeforeEncoding: {
  943. this.stateMetaContentValueUnquotedBeforeEncoding(c);
  944. break;
  945. }
  946. case State.MetaContentValueUnquotedBeforeValue: {
  947. this.stateMetaContentValueUnquotedBeforeValue(c);
  948. break;
  949. }
  950. case State.MetaContentValueUnquotedValueQuoted: {
  951. this.stateMetaContentValueUnquotedValueQuoted(c);
  952. break;
  953. }
  954. case State.MetaContentValueUnquotedValueUnquoted: {
  955. this.stateMetaContentValueUnquotedValueUnquoted(c);
  956. break;
  957. }
  958. case State.AnyAttribName: {
  959. this.stateAnyAttribName(c);
  960. break;
  961. }
  962. case State.AfterAttributeName: {
  963. this.stateAfterAttributeName(c);
  964. break;
  965. }
  966. case State.BeforeAttributeValue: {
  967. this.stateBeforeAttributeValue(c);
  968. break;
  969. }
  970. case State.AttributeValueQuoted: {
  971. this.stateAttributeValueQuoted(c);
  972. break;
  973. }
  974. default: {
  975. // (State.AttributeValueUnquoted)
  976. this.stateAttributeValueUnquoted(c);
  977. }
  978. }
  979. }
  980. this.offset += index;
  981. };
  982. return Sniffer;
  983. }());
  984. exports.Sniffer = Sniffer;
  985. /** Get the encoding for the passed buffer. */
  986. function getEncoding(buffer, options) {
  987. var sniffer = new Sniffer(options);
  988. sniffer.write(buffer);
  989. return sniffer.encoding;
  990. }
  991. exports.getEncoding = getEncoding;
  992. //# sourceMappingURL=sniffer.js.map