index.js 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.ParserStream = void 0;
  4. const node_stream_1 = require("node:stream");
  5. const parse5_1 = require("parse5");
  6. /* eslint-disable unicorn/consistent-function-scoping -- The rule seems to be broken here. */
  7. /**
  8. * Streaming HTML parser with scripting support.
  9. * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
  10. *
  11. * @example
  12. *
  13. * ```js
  14. * const ParserStream = require('parse5-parser-stream');
  15. * const http = require('http');
  16. * const { finished } = require('node:stream');
  17. *
  18. * // Fetch the page content and obtain it's <head> node
  19. * http.get('http://inikulin.github.io/parse5/', res => {
  20. * const parser = new ParserStream();
  21. *
  22. * finished(parser, () => {
  23. * console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
  24. * });
  25. *
  26. * res.pipe(parser);
  27. * });
  28. * ```
  29. *
  30. */
  31. class ParserStream extends node_stream_1.Writable {
  32. static getFragmentStream(fragmentContext, options) {
  33. const parser = parse5_1.Parser.getFragmentParser(fragmentContext, options);
  34. const stream = new ParserStream(options, parser);
  35. return stream;
  36. }
  37. /** The resulting document node. */
  38. get document() {
  39. return this.parser.document;
  40. }
  41. getFragment() {
  42. return this.parser.getFragment();
  43. }
  44. /**
  45. * @param options Parsing options.
  46. */
  47. constructor(options, parser = new parse5_1.Parser(options)) {
  48. super({ decodeStrings: false });
  49. this.parser = parser;
  50. this.lastChunkWritten = false;
  51. this.writeCallback = undefined;
  52. this.pendingHtmlInsertions = [];
  53. const resume = () => {
  54. for (let i = this.pendingHtmlInsertions.length - 1; i >= 0; i--) {
  55. this.parser.tokenizer.insertHtmlAtCurrentPos(this.pendingHtmlInsertions[i]);
  56. }
  57. this.pendingHtmlInsertions.length = 0;
  58. //NOTE: keep parsing if we don't wait for the next input chunk
  59. this.parser.tokenizer.resume(this.writeCallback);
  60. };
  61. const documentWrite = (html) => {
  62. if (!this.parser.stopped) {
  63. this.pendingHtmlInsertions.push(html);
  64. }
  65. };
  66. const scriptHandler = (scriptElement) => {
  67. if (this.listenerCount('script') > 0) {
  68. this.parser.tokenizer.pause();
  69. this.emit('script', scriptElement, documentWrite, resume);
  70. }
  71. };
  72. this.parser.scriptHandler = scriptHandler;
  73. }
  74. //WritableStream implementation
  75. _write(chunk, _encoding, callback) {
  76. if (typeof chunk !== 'string') {
  77. throw new TypeError('Parser can work only with string streams.');
  78. }
  79. this.writeCallback = callback;
  80. this.parser.tokenizer.write(chunk, this.lastChunkWritten, this.writeCallback);
  81. }
  82. // TODO [engine:node@>=16]: Due to issues with Node < 16, we are overriding `end` instead of `_final`.
  83. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  84. end(chunk, encoding, callback) {
  85. this.lastChunkWritten = true;
  86. super.end(chunk || '', encoding, callback);
  87. }
  88. }
  89. exports.ParserStream = ParserStream;
  90. //# sourceMappingURL=index.js.map