Parser.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. "use strict";
  2. var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
  3. if (k2 === undefined) k2 = k;
  4. var desc = Object.getOwnPropertyDescriptor(m, k);
  5. if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
  6. desc = { enumerable: true, get: function() { return m[k]; } };
  7. }
  8. Object.defineProperty(o, k2, desc);
  9. }) : (function(o, m, k, k2) {
  10. if (k2 === undefined) k2 = k;
  11. o[k2] = m[k];
  12. }));
  13. var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
  14. Object.defineProperty(o, "default", { enumerable: true, value: v });
  15. }) : function(o, v) {
  16. o["default"] = v;
  17. });
  18. var __importStar = (this && this.__importStar) || function (mod) {
  19. if (mod && mod.__esModule) return mod;
  20. var result = {};
  21. if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
  22. __setModuleDefault(result, mod);
  23. return result;
  24. };
  25. var __read = (this && this.__read) || function (o, n) {
  26. var m = typeof Symbol === "function" && o[Symbol.iterator];
  27. if (!m) return o;
  28. var i = m.call(o), r, ar = [], e;
  29. try {
  30. while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
  31. }
  32. catch (error) { e = { error: error }; }
  33. finally {
  34. try {
  35. if (r && !r.done && (m = i["return"])) m.call(i);
  36. }
  37. finally { if (e) throw e.error; }
  38. }
  39. return ar;
  40. };
  41. var __values = (this && this.__values) || function(o) {
  42. var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
  43. if (m) return m.call(o);
  44. if (o && typeof o.length === "number") return {
  45. next: function () {
  46. if (o && i >= o.length) o = void 0;
  47. return { value: o && o[i++], done: !o };
  48. }
  49. };
  50. throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
  51. };
  52. Object.defineProperty(exports, "__esModule", { value: true });
  53. exports.LiteParser = exports.PATTERNS = void 0;
  54. var Entities = __importStar(require("../../util/Entities.js"));
  55. var Element_js_1 = require("./Element.js");
  56. var Text_js_1 = require("./Text.js");
  57. var PATTERNS;
  58. (function (PATTERNS) {
  59. PATTERNS.TAGNAME = '[a-z][^\\s\\n>]*';
  60. PATTERNS.ATTNAME = '[a-z][^\\s\\n>=]*';
  61. PATTERNS.VALUE = "(?:'[^']*'|\"[^\"]*\"|[^\\s\\n]+)";
  62. PATTERNS.VALUESPLIT = "(?:'([^']*)'|\"([^\"]*)\"|([^\\s\\n]+))";
  63. PATTERNS.SPACE = '(?:\\s|\\n)+';
  64. PATTERNS.OPTIONALSPACE = '(?:\\s|\\n)*';
  65. PATTERNS.ATTRIBUTE = PATTERNS.ATTNAME + '(?:' + PATTERNS.OPTIONALSPACE + '=' + PATTERNS.OPTIONALSPACE + PATTERNS.VALUE + ')?';
  66. PATTERNS.ATTRIBUTESPLIT = '(' + PATTERNS.ATTNAME + ')(?:' + PATTERNS.OPTIONALSPACE + '=' + PATTERNS.OPTIONALSPACE + PATTERNS.VALUESPLIT + ')?';
  67. PATTERNS.TAG = '(<(?:' + PATTERNS.TAGNAME + '(?:' + PATTERNS.SPACE + PATTERNS.ATTRIBUTE + ')*'
  68. + PATTERNS.OPTIONALSPACE + '/?|/' + PATTERNS.TAGNAME + '|!--[^]*?--|![^]*?)(?:>|$))';
  69. PATTERNS.tag = new RegExp(PATTERNS.TAG, 'i');
  70. PATTERNS.attr = new RegExp(PATTERNS.ATTRIBUTE, 'i');
  71. PATTERNS.attrsplit = new RegExp(PATTERNS.ATTRIBUTESPLIT, 'i');
  72. })(PATTERNS = exports.PATTERNS || (exports.PATTERNS = {}));
  73. var LiteParser = (function () {
  74. function LiteParser() {
  75. }
  76. LiteParser.prototype.parseFromString = function (text, _format, adaptor) {
  77. if (_format === void 0) { _format = 'text/html'; }
  78. if (adaptor === void 0) { adaptor = null; }
  79. var root = adaptor.createDocument();
  80. var node = adaptor.body(root);
  81. var parts = text.replace(/<\?.*?\?>/g, '').split(PATTERNS.tag);
  82. while (parts.length) {
  83. var text_1 = parts.shift();
  84. var tag = parts.shift();
  85. if (text_1) {
  86. this.addText(adaptor, node, text_1);
  87. }
  88. if (tag && tag.charAt(tag.length - 1) === '>') {
  89. if (tag.charAt(1) === '!') {
  90. this.addComment(adaptor, node, tag);
  91. }
  92. else if (tag.charAt(1) === '/') {
  93. node = this.closeTag(adaptor, node, tag);
  94. }
  95. else {
  96. node = this.openTag(adaptor, node, tag, parts);
  97. }
  98. }
  99. }
  100. this.checkDocument(adaptor, root);
  101. return root;
  102. };
  103. LiteParser.prototype.addText = function (adaptor, node, text) {
  104. text = Entities.translate(text);
  105. return adaptor.append(node, adaptor.text(text));
  106. };
  107. LiteParser.prototype.addComment = function (adaptor, node, comment) {
  108. return adaptor.append(node, new Text_js_1.LiteComment(comment));
  109. };
  110. LiteParser.prototype.closeTag = function (adaptor, node, tag) {
  111. var kind = tag.slice(2, tag.length - 1).toLowerCase();
  112. while (adaptor.parent(node) && adaptor.kind(node) !== kind) {
  113. node = adaptor.parent(node);
  114. }
  115. return adaptor.parent(node);
  116. };
  117. LiteParser.prototype.openTag = function (adaptor, node, tag, parts) {
  118. var PCDATA = this.constructor.PCDATA;
  119. var SELF_CLOSING = this.constructor.SELF_CLOSING;
  120. var kind = tag.match(/<(.*?)[\s\n>\/]/)[1].toLowerCase();
  121. var child = adaptor.node(kind);
  122. var attributes = tag.replace(/^<.*?[\s\n>]/, '').split(PATTERNS.attrsplit);
  123. if (attributes.pop().match(/>$/) || attributes.length < 5) {
  124. this.addAttributes(adaptor, child, attributes);
  125. adaptor.append(node, child);
  126. if (!SELF_CLOSING[kind] && !tag.match(/\/>$/)) {
  127. if (PCDATA[kind]) {
  128. this.handlePCDATA(adaptor, child, kind, parts);
  129. }
  130. else {
  131. node = child;
  132. }
  133. }
  134. }
  135. return node;
  136. };
  137. LiteParser.prototype.addAttributes = function (adaptor, node, attributes) {
  138. var CDATA_ATTR = this.constructor.CDATA_ATTR;
  139. while (attributes.length) {
  140. var _a = __read(attributes.splice(0, 5), 5), name_1 = _a[1], v1 = _a[2], v2 = _a[3], v3 = _a[4];
  141. var value = v1 || v2 || v3 || '';
  142. if (!CDATA_ATTR[name_1]) {
  143. value = Entities.translate(value);
  144. }
  145. adaptor.setAttribute(node, name_1, value);
  146. }
  147. };
  148. LiteParser.prototype.handlePCDATA = function (adaptor, node, kind, parts) {
  149. var pcdata = [];
  150. var etag = '</' + kind + '>';
  151. var ptag = '';
  152. while (parts.length && ptag !== etag) {
  153. pcdata.push(ptag);
  154. pcdata.push(parts.shift());
  155. ptag = parts.shift();
  156. }
  157. adaptor.append(node, adaptor.text(pcdata.join('')));
  158. };
  159. LiteParser.prototype.checkDocument = function (adaptor, root) {
  160. var e_1, _a, e_2, _b;
  161. var node = this.getOnlyChild(adaptor, adaptor.body(root));
  162. if (!node)
  163. return;
  164. try {
  165. for (var _c = __values(adaptor.childNodes(adaptor.body(root))), _d = _c.next(); !_d.done; _d = _c.next()) {
  166. var child = _d.value;
  167. if (child === node) {
  168. break;
  169. }
  170. if (child instanceof Text_js_1.LiteComment && child.value.match(/^<!DOCTYPE/)) {
  171. root.type = child.value;
  172. }
  173. }
  174. }
  175. catch (e_1_1) { e_1 = { error: e_1_1 }; }
  176. finally {
  177. try {
  178. if (_d && !_d.done && (_a = _c.return)) _a.call(_c);
  179. }
  180. finally { if (e_1) throw e_1.error; }
  181. }
  182. switch (adaptor.kind(node)) {
  183. case 'html':
  184. try {
  185. for (var _e = __values(node.children), _f = _e.next(); !_f.done; _f = _e.next()) {
  186. var child = _f.value;
  187. switch (adaptor.kind(child)) {
  188. case 'head':
  189. root.head = child;
  190. break;
  191. case 'body':
  192. root.body = child;
  193. break;
  194. }
  195. }
  196. }
  197. catch (e_2_1) { e_2 = { error: e_2_1 }; }
  198. finally {
  199. try {
  200. if (_f && !_f.done && (_b = _e.return)) _b.call(_e);
  201. }
  202. finally { if (e_2) throw e_2.error; }
  203. }
  204. root.root = node;
  205. adaptor.remove(node);
  206. if (adaptor.parent(root.body) !== node) {
  207. adaptor.append(node, root.body);
  208. }
  209. if (adaptor.parent(root.head) !== node) {
  210. adaptor.insert(root.head, root.body);
  211. }
  212. break;
  213. case 'head':
  214. root.head = adaptor.replace(node, root.head);
  215. break;
  216. case 'body':
  217. root.body = adaptor.replace(node, root.body);
  218. break;
  219. }
  220. };
  221. LiteParser.prototype.getOnlyChild = function (adaptor, body) {
  222. var e_3, _a;
  223. var node = null;
  224. try {
  225. for (var _b = __values(adaptor.childNodes(body)), _c = _b.next(); !_c.done; _c = _b.next()) {
  226. var child = _c.value;
  227. if (child instanceof Element_js_1.LiteElement) {
  228. if (node)
  229. return null;
  230. node = child;
  231. }
  232. }
  233. }
  234. catch (e_3_1) { e_3 = { error: e_3_1 }; }
  235. finally {
  236. try {
  237. if (_c && !_c.done && (_a = _b.return)) _a.call(_b);
  238. }
  239. finally { if (e_3) throw e_3.error; }
  240. }
  241. return node;
  242. };
  243. LiteParser.prototype.serialize = function (adaptor, node, xml) {
  244. var _this = this;
  245. if (xml === void 0) { xml = false; }
  246. var SELF_CLOSING = this.constructor.SELF_CLOSING;
  247. var CDATA = this.constructor.CDATA_ATTR;
  248. var tag = adaptor.kind(node);
  249. var attributes = adaptor.allAttributes(node).map(function (x) { return x.name + '="' + (CDATA[x.name] ? x.value : _this.protectAttribute(x.value)) + '"'; }).join(' ');
  250. var content = this.serializeInner(adaptor, node, xml);
  251. var html = '<' + tag + (attributes ? ' ' + attributes : '')
  252. + ((!xml || content) && !SELF_CLOSING[tag] ? ">".concat(content, "</").concat(tag, ">") : xml ? '/>' : '>');
  253. return html;
  254. };
  255. LiteParser.prototype.serializeInner = function (adaptor, node, xml) {
  256. var _this = this;
  257. if (xml === void 0) { xml = false; }
  258. var PCDATA = this.constructor.PCDATA;
  259. if (PCDATA.hasOwnProperty(node.kind)) {
  260. return adaptor.childNodes(node).map(function (x) { return adaptor.value(x); }).join('');
  261. }
  262. return adaptor.childNodes(node).map(function (x) {
  263. var kind = adaptor.kind(x);
  264. return (kind === '#text' ? _this.protectHTML(adaptor.value(x)) :
  265. kind === '#comment' ? x.value :
  266. _this.serialize(adaptor, x, xml));
  267. }).join('');
  268. };
  269. LiteParser.prototype.protectAttribute = function (text) {
  270. if (typeof text !== 'string') {
  271. text = String(text);
  272. }
  273. return text.replace(/"/g, '&quot;');
  274. };
  275. LiteParser.prototype.protectHTML = function (text) {
  276. return text.replace(/&/g, '&amp;')
  277. .replace(/</g, '&lt;')
  278. .replace(/>/g, '&gt;');
  279. };
  280. LiteParser.SELF_CLOSING = {
  281. area: true,
  282. base: true,
  283. br: true,
  284. col: true,
  285. command: true,
  286. embed: true,
  287. hr: true,
  288. img: true,
  289. input: true,
  290. keygen: true,
  291. link: true,
  292. menuitem: true,
  293. meta: true,
  294. param: true,
  295. source: true,
  296. track: true,
  297. wbr: true
  298. };
  299. LiteParser.PCDATA = {
  300. option: true,
  301. textarea: true,
  302. fieldset: true,
  303. title: true,
  304. style: true,
  305. script: true
  306. };
  307. LiteParser.CDATA_ATTR = {
  308. style: true,
  309. datafld: true,
  310. datasrc: true,
  311. href: true,
  312. src: true,
  313. longdesc: true,
  314. usemap: true,
  315. cite: true,
  316. datetime: true,
  317. action: true,
  318. axis: true,
  319. profile: true,
  320. content: true,
  321. scheme: true
  322. };
  323. return LiteParser;
  324. }());
  325. exports.LiteParser = LiteParser;
  326. //# sourceMappingURL=Parser.js.map