url-state-machine.js 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303
  1. "use strict";
  2. const punycode = require("punycode");
  3. const tr46 = require("tr46");
  4. const infra = require("./infra");
  5. const { percentEncode, percentDecode } = require("./urlencoded");
  6. const specialSchemes = {
  7. ftp: 21,
  8. file: null,
  9. http: 80,
  10. https: 443,
  11. ws: 80,
  12. wss: 443
  13. };
  14. const failure = Symbol("failure");
  15. function countSymbols(str) {
  16. return punycode.ucs2.decode(str).length;
  17. }
  18. function at(input, idx) {
  19. const c = input[idx];
  20. return isNaN(c) ? undefined : String.fromCodePoint(c);
  21. }
  22. function isSingleDot(buffer) {
  23. return buffer === "." || buffer.toLowerCase() === "%2e";
  24. }
  25. function isDoubleDot(buffer) {
  26. buffer = buffer.toLowerCase();
  27. return buffer === ".." || buffer === "%2e." || buffer === ".%2e" || buffer === "%2e%2e";
  28. }
  29. function isWindowsDriveLetterCodePoints(cp1, cp2) {
  30. return infra.isASCIIAlpha(cp1) && (cp2 === 58 || cp2 === 124);
  31. }
  32. function isWindowsDriveLetterString(string) {
  33. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && (string[1] === ":" || string[1] === "|");
  34. }
  35. function isNormalizedWindowsDriveLetterString(string) {
  36. return string.length === 2 && infra.isASCIIAlpha(string.codePointAt(0)) && string[1] === ":";
  37. }
  38. function containsForbiddenHostCodePoint(string) {
  39. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|%|\/|:|\?|@|\[|\\|\]/) !== -1;
  40. }
  41. function containsForbiddenHostCodePointExcludingPercent(string) {
  42. return string.search(/\u0000|\u0009|\u000A|\u000D|\u0020|#|\/|:|\?|@|\[|\\|\]/) !== -1;
  43. }
  44. function isSpecialScheme(scheme) {
  45. return specialSchemes[scheme] !== undefined;
  46. }
  47. function isSpecial(url) {
  48. return isSpecialScheme(url.scheme);
  49. }
  50. function isNotSpecial(url) {
  51. return !isSpecialScheme(url.scheme);
  52. }
  53. function defaultPort(scheme) {
  54. return specialSchemes[scheme];
  55. }
  56. function utf8PercentEncode(c) {
  57. const buf = Buffer.from(c);
  58. let str = "";
  59. for (let i = 0; i < buf.length; ++i) {
  60. str += percentEncode(buf[i]);
  61. }
  62. return str;
  63. }
  64. function isC0ControlPercentEncode(c) {
  65. return c <= 0x1F || c > 0x7E;
  66. }
  67. const extraUserinfoPercentEncodeSet =
  68. new Set([47, 58, 59, 61, 64, 91, 92, 93, 94, 124]);
  69. function isUserinfoPercentEncode(c) {
  70. return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
  71. }
  72. const extraFragmentPercentEncodeSet = new Set([32, 34, 60, 62, 96]);
  73. function isFragmentPercentEncode(c) {
  74. return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
  75. }
  76. const extraPathPercentEncodeSet = new Set([35, 63, 123, 125]);
  77. function isPathPercentEncode(c) {
  78. return isFragmentPercentEncode(c) || extraPathPercentEncodeSet.has(c);
  79. }
  80. function percentEncodeChar(c, encodeSetPredicate) {
  81. const cStr = String.fromCodePoint(c);
  82. if (encodeSetPredicate(c)) {
  83. return utf8PercentEncode(cStr);
  84. }
  85. return cStr;
  86. }
  87. function parseIPv4Number(input) {
  88. let R = 10;
  89. if (input.length >= 2 && input.charAt(0) === "0" && input.charAt(1).toLowerCase() === "x") {
  90. input = input.substring(2);
  91. R = 16;
  92. } else if (input.length >= 2 && input.charAt(0) === "0") {
  93. input = input.substring(1);
  94. R = 8;
  95. }
  96. if (input === "") {
  97. return 0;
  98. }
  99. let regex = /[^0-7]/;
  100. if (R === 10) {
  101. regex = /[^0-9]/;
  102. }
  103. if (R === 16) {
  104. regex = /[^0-9A-Fa-f]/;
  105. }
  106. if (regex.test(input)) {
  107. return failure;
  108. }
  109. return parseInt(input, R);
  110. }
  111. function parseIPv4(input) {
  112. const parts = input.split(".");
  113. if (parts[parts.length - 1] === "") {
  114. if (parts.length > 1) {
  115. parts.pop();
  116. }
  117. }
  118. if (parts.length > 4) {
  119. return input;
  120. }
  121. const numbers = [];
  122. for (const part of parts) {
  123. if (part === "") {
  124. return input;
  125. }
  126. const n = parseIPv4Number(part);
  127. if (n === failure) {
  128. return input;
  129. }
  130. numbers.push(n);
  131. }
  132. for (let i = 0; i < numbers.length - 1; ++i) {
  133. if (numbers[i] > 255) {
  134. return failure;
  135. }
  136. }
  137. if (numbers[numbers.length - 1] >= Math.pow(256, 5 - numbers.length)) {
  138. return failure;
  139. }
  140. let ipv4 = numbers.pop();
  141. let counter = 0;
  142. for (const n of numbers) {
  143. ipv4 += n * Math.pow(256, 3 - counter);
  144. ++counter;
  145. }
  146. return ipv4;
  147. }
  148. function serializeIPv4(address) {
  149. let output = "";
  150. let n = address;
  151. for (let i = 1; i <= 4; ++i) {
  152. output = String(n % 256) + output;
  153. if (i !== 4) {
  154. output = "." + output;
  155. }
  156. n = Math.floor(n / 256);
  157. }
  158. return output;
  159. }
  160. function parseIPv6(input) {
  161. const address = [0, 0, 0, 0, 0, 0, 0, 0];
  162. let pieceIndex = 0;
  163. let compress = null;
  164. let pointer = 0;
  165. input = punycode.ucs2.decode(input);
  166. if (input[pointer] === 58) {
  167. if (input[pointer + 1] !== 58) {
  168. return failure;
  169. }
  170. pointer += 2;
  171. ++pieceIndex;
  172. compress = pieceIndex;
  173. }
  174. while (pointer < input.length) {
  175. if (pieceIndex === 8) {
  176. return failure;
  177. }
  178. if (input[pointer] === 58) {
  179. if (compress !== null) {
  180. return failure;
  181. }
  182. ++pointer;
  183. ++pieceIndex;
  184. compress = pieceIndex;
  185. continue;
  186. }
  187. let value = 0;
  188. let length = 0;
  189. while (length < 4 && infra.isASCIIHex(input[pointer])) {
  190. value = value * 0x10 + parseInt(at(input, pointer), 16);
  191. ++pointer;
  192. ++length;
  193. }
  194. if (input[pointer] === 46) {
  195. if (length === 0) {
  196. return failure;
  197. }
  198. pointer -= length;
  199. if (pieceIndex > 6) {
  200. return failure;
  201. }
  202. let numbersSeen = 0;
  203. while (input[pointer] !== undefined) {
  204. let ipv4Piece = null;
  205. if (numbersSeen > 0) {
  206. if (input[pointer] === 46 && numbersSeen < 4) {
  207. ++pointer;
  208. } else {
  209. return failure;
  210. }
  211. }
  212. if (!infra.isASCIIDigit(input[pointer])) {
  213. return failure;
  214. }
  215. while (infra.isASCIIDigit(input[pointer])) {
  216. const number = parseInt(at(input, pointer));
  217. if (ipv4Piece === null) {
  218. ipv4Piece = number;
  219. } else if (ipv4Piece === 0) {
  220. return failure;
  221. } else {
  222. ipv4Piece = ipv4Piece * 10 + number;
  223. }
  224. if (ipv4Piece > 255) {
  225. return failure;
  226. }
  227. ++pointer;
  228. }
  229. address[pieceIndex] = address[pieceIndex] * 0x100 + ipv4Piece;
  230. ++numbersSeen;
  231. if (numbersSeen === 2 || numbersSeen === 4) {
  232. ++pieceIndex;
  233. }
  234. }
  235. if (numbersSeen !== 4) {
  236. return failure;
  237. }
  238. break;
  239. } else if (input[pointer] === 58) {
  240. ++pointer;
  241. if (input[pointer] === undefined) {
  242. return failure;
  243. }
  244. } else if (input[pointer] !== undefined) {
  245. return failure;
  246. }
  247. address[pieceIndex] = value;
  248. ++pieceIndex;
  249. }
  250. if (compress !== null) {
  251. let swaps = pieceIndex - compress;
  252. pieceIndex = 7;
  253. while (pieceIndex !== 0 && swaps > 0) {
  254. const temp = address[compress + swaps - 1];
  255. address[compress + swaps - 1] = address[pieceIndex];
  256. address[pieceIndex] = temp;
  257. --pieceIndex;
  258. --swaps;
  259. }
  260. } else if (compress === null && pieceIndex !== 8) {
  261. return failure;
  262. }
  263. return address;
  264. }
  265. function serializeIPv6(address) {
  266. let output = "";
  267. const seqResult = findLongestZeroSequence(address);
  268. const compress = seqResult.idx;
  269. let ignore0 = false;
  270. for (let pieceIndex = 0; pieceIndex <= 7; ++pieceIndex) {
  271. if (ignore0 && address[pieceIndex] === 0) {
  272. continue;
  273. } else if (ignore0) {
  274. ignore0 = false;
  275. }
  276. if (compress === pieceIndex) {
  277. const separator = pieceIndex === 0 ? "::" : ":";
  278. output += separator;
  279. ignore0 = true;
  280. continue;
  281. }
  282. output += address[pieceIndex].toString(16);
  283. if (pieceIndex !== 7) {
  284. output += ":";
  285. }
  286. }
  287. return output;
  288. }
  289. function parseHost(input, isNotSpecialArg = false) {
  290. if (input[0] === "[") {
  291. if (input[input.length - 1] !== "]") {
  292. return failure;
  293. }
  294. return parseIPv6(input.substring(1, input.length - 1));
  295. }
  296. if (isNotSpecialArg) {
  297. return parseOpaqueHost(input);
  298. }
  299. const domain = percentDecode(Buffer.from(input)).toString();
  300. const asciiDomain = domainToASCII(domain);
  301. if (asciiDomain === failure) {
  302. return failure;
  303. }
  304. if (containsForbiddenHostCodePoint(asciiDomain)) {
  305. return failure;
  306. }
  307. const ipv4Host = parseIPv4(asciiDomain);
  308. if (typeof ipv4Host === "number" || ipv4Host === failure) {
  309. return ipv4Host;
  310. }
  311. return asciiDomain;
  312. }
  313. function parseOpaqueHost(input) {
  314. if (containsForbiddenHostCodePointExcludingPercent(input)) {
  315. return failure;
  316. }
  317. let output = "";
  318. const decoded = punycode.ucs2.decode(input);
  319. for (let i = 0; i < decoded.length; ++i) {
  320. output += percentEncodeChar(decoded[i], isC0ControlPercentEncode);
  321. }
  322. return output;
  323. }
  324. function findLongestZeroSequence(arr) {
  325. let maxIdx = null;
  326. let maxLen = 1; // only find elements > 1
  327. let currStart = null;
  328. let currLen = 0;
  329. for (let i = 0; i < arr.length; ++i) {
  330. if (arr[i] !== 0) {
  331. if (currLen > maxLen) {
  332. maxIdx = currStart;
  333. maxLen = currLen;
  334. }
  335. currStart = null;
  336. currLen = 0;
  337. } else {
  338. if (currStart === null) {
  339. currStart = i;
  340. }
  341. ++currLen;
  342. }
  343. }
  344. // if trailing zeros
  345. if (currLen > maxLen) {
  346. maxIdx = currStart;
  347. maxLen = currLen;
  348. }
  349. return {
  350. idx: maxIdx,
  351. len: maxLen
  352. };
  353. }
  354. function serializeHost(host) {
  355. if (typeof host === "number") {
  356. return serializeIPv4(host);
  357. }
  358. // IPv6 serializer
  359. if (host instanceof Array) {
  360. return "[" + serializeIPv6(host) + "]";
  361. }
  362. return host;
  363. }
  364. function domainToASCII(domain, beStrict = false) {
  365. const result = tr46.toASCII(domain, {
  366. checkBidi: true,
  367. checkHyphens: false,
  368. checkJoiners: true,
  369. useSTD3ASCIIRules: beStrict,
  370. verifyDNSLength: beStrict
  371. });
  372. if (result === null) {
  373. return failure;
  374. }
  375. return result;
  376. }
  377. function trimControlChars(url) {
  378. return url.replace(/^[\u0000-\u001F\u0020]+|[\u0000-\u001F\u0020]+$/g, "");
  379. }
  380. function trimTabAndNewline(url) {
  381. return url.replace(/\u0009|\u000A|\u000D/g, "");
  382. }
  383. function shortenPath(url) {
  384. const { path } = url;
  385. if (path.length === 0) {
  386. return;
  387. }
  388. if (url.scheme === "file" && path.length === 1 && isNormalizedWindowsDriveLetter(path[0])) {
  389. return;
  390. }
  391. path.pop();
  392. }
  393. function includesCredentials(url) {
  394. return url.username !== "" || url.password !== "";
  395. }
  396. function cannotHaveAUsernamePasswordPort(url) {
  397. return url.host === null || url.host === "" || url.cannotBeABaseURL || url.scheme === "file";
  398. }
  399. function isNormalizedWindowsDriveLetter(string) {
  400. return /^[A-Za-z]:$/.test(string);
  401. }
  402. function URLStateMachine(input, base, encodingOverride, url, stateOverride) {
  403. this.pointer = 0;
  404. this.input = input;
  405. this.base = base || null;
  406. this.encodingOverride = encodingOverride || "utf-8";
  407. this.stateOverride = stateOverride;
  408. this.url = url;
  409. this.failure = false;
  410. this.parseError = false;
  411. if (!this.url) {
  412. this.url = {
  413. scheme: "",
  414. username: "",
  415. password: "",
  416. host: null,
  417. port: null,
  418. path: [],
  419. query: null,
  420. fragment: null,
  421. cannotBeABaseURL: false
  422. };
  423. const res = trimControlChars(this.input);
  424. if (res !== this.input) {
  425. this.parseError = true;
  426. }
  427. this.input = res;
  428. }
  429. const res = trimTabAndNewline(this.input);
  430. if (res !== this.input) {
  431. this.parseError = true;
  432. }
  433. this.input = res;
  434. this.state = stateOverride || "scheme start";
  435. this.buffer = "";
  436. this.atFlag = false;
  437. this.arrFlag = false;
  438. this.passwordTokenSeenFlag = false;
  439. this.input = punycode.ucs2.decode(this.input);
  440. for (; this.pointer <= this.input.length; ++this.pointer) {
  441. const c = this.input[this.pointer];
  442. const cStr = isNaN(c) ? undefined : String.fromCodePoint(c);
  443. // exec state machine
  444. const ret = this["parse " + this.state](c, cStr);
  445. if (!ret) {
  446. break; // terminate algorithm
  447. } else if (ret === failure) {
  448. this.failure = true;
  449. break;
  450. }
  451. }
  452. }
  453. URLStateMachine.prototype["parse scheme start"] = function parseSchemeStart(c, cStr) {
  454. if (infra.isASCIIAlpha(c)) {
  455. this.buffer += cStr.toLowerCase();
  456. this.state = "scheme";
  457. } else if (!this.stateOverride) {
  458. this.state = "no scheme";
  459. --this.pointer;
  460. } else {
  461. this.parseError = true;
  462. return failure;
  463. }
  464. return true;
  465. };
  466. URLStateMachine.prototype["parse scheme"] = function parseScheme(c, cStr) {
  467. if (infra.isASCIIAlphanumeric(c) || c === 43 || c === 45 || c === 46) {
  468. this.buffer += cStr.toLowerCase();
  469. } else if (c === 58) {
  470. if (this.stateOverride) {
  471. if (isSpecial(this.url) && !isSpecialScheme(this.buffer)) {
  472. return false;
  473. }
  474. if (!isSpecial(this.url) && isSpecialScheme(this.buffer)) {
  475. return false;
  476. }
  477. if ((includesCredentials(this.url) || this.url.port !== null) && this.buffer === "file") {
  478. return false;
  479. }
  480. if (this.url.scheme === "file" && (this.url.host === "" || this.url.host === null)) {
  481. return false;
  482. }
  483. }
  484. this.url.scheme = this.buffer;
  485. if (this.stateOverride) {
  486. if (this.url.port === defaultPort(this.url.scheme)) {
  487. this.url.port = null;
  488. }
  489. return false;
  490. }
  491. this.buffer = "";
  492. if (this.url.scheme === "file") {
  493. if (this.input[this.pointer + 1] !== 47 || this.input[this.pointer + 2] !== 47) {
  494. this.parseError = true;
  495. }
  496. this.state = "file";
  497. } else if (isSpecial(this.url) && this.base !== null && this.base.scheme === this.url.scheme) {
  498. this.state = "special relative or authority";
  499. } else if (isSpecial(this.url)) {
  500. this.state = "special authority slashes";
  501. } else if (this.input[this.pointer + 1] === 47) {
  502. this.state = "path or authority";
  503. ++this.pointer;
  504. } else {
  505. this.url.cannotBeABaseURL = true;
  506. this.url.path.push("");
  507. this.state = "cannot-be-a-base-URL path";
  508. }
  509. } else if (!this.stateOverride) {
  510. this.buffer = "";
  511. this.state = "no scheme";
  512. this.pointer = -1;
  513. } else {
  514. this.parseError = true;
  515. return failure;
  516. }
  517. return true;
  518. };
  519. URLStateMachine.prototype["parse no scheme"] = function parseNoScheme(c) {
  520. if (this.base === null || (this.base.cannotBeABaseURL && c !== 35)) {
  521. return failure;
  522. } else if (this.base.cannotBeABaseURL && c === 35) {
  523. this.url.scheme = this.base.scheme;
  524. this.url.path = this.base.path.slice();
  525. this.url.query = this.base.query;
  526. this.url.fragment = "";
  527. this.url.cannotBeABaseURL = true;
  528. this.state = "fragment";
  529. } else if (this.base.scheme === "file") {
  530. this.state = "file";
  531. --this.pointer;
  532. } else {
  533. this.state = "relative";
  534. --this.pointer;
  535. }
  536. return true;
  537. };
  538. URLStateMachine.prototype["parse special relative or authority"] = function parseSpecialRelativeOrAuthority(c) {
  539. if (c === 47 && this.input[this.pointer + 1] === 47) {
  540. this.state = "special authority ignore slashes";
  541. ++this.pointer;
  542. } else {
  543. this.parseError = true;
  544. this.state = "relative";
  545. --this.pointer;
  546. }
  547. return true;
  548. };
  549. URLStateMachine.prototype["parse path or authority"] = function parsePathOrAuthority(c) {
  550. if (c === 47) {
  551. this.state = "authority";
  552. } else {
  553. this.state = "path";
  554. --this.pointer;
  555. }
  556. return true;
  557. };
  558. URLStateMachine.prototype["parse relative"] = function parseRelative(c) {
  559. this.url.scheme = this.base.scheme;
  560. if (isNaN(c)) {
  561. this.url.username = this.base.username;
  562. this.url.password = this.base.password;
  563. this.url.host = this.base.host;
  564. this.url.port = this.base.port;
  565. this.url.path = this.base.path.slice();
  566. this.url.query = this.base.query;
  567. } else if (c === 47) {
  568. this.state = "relative slash";
  569. } else if (c === 63) {
  570. this.url.username = this.base.username;
  571. this.url.password = this.base.password;
  572. this.url.host = this.base.host;
  573. this.url.port = this.base.port;
  574. this.url.path = this.base.path.slice();
  575. this.url.query = "";
  576. this.state = "query";
  577. } else if (c === 35) {
  578. this.url.username = this.base.username;
  579. this.url.password = this.base.password;
  580. this.url.host = this.base.host;
  581. this.url.port = this.base.port;
  582. this.url.path = this.base.path.slice();
  583. this.url.query = this.base.query;
  584. this.url.fragment = "";
  585. this.state = "fragment";
  586. } else if (isSpecial(this.url) && c === 92) {
  587. this.parseError = true;
  588. this.state = "relative slash";
  589. } else {
  590. this.url.username = this.base.username;
  591. this.url.password = this.base.password;
  592. this.url.host = this.base.host;
  593. this.url.port = this.base.port;
  594. this.url.path = this.base.path.slice(0, this.base.path.length - 1);
  595. this.state = "path";
  596. --this.pointer;
  597. }
  598. return true;
  599. };
  600. URLStateMachine.prototype["parse relative slash"] = function parseRelativeSlash(c) {
  601. if (isSpecial(this.url) && (c === 47 || c === 92)) {
  602. if (c === 92) {
  603. this.parseError = true;
  604. }
  605. this.state = "special authority ignore slashes";
  606. } else if (c === 47) {
  607. this.state = "authority";
  608. } else {
  609. this.url.username = this.base.username;
  610. this.url.password = this.base.password;
  611. this.url.host = this.base.host;
  612. this.url.port = this.base.port;
  613. this.state = "path";
  614. --this.pointer;
  615. }
  616. return true;
  617. };
  618. URLStateMachine.prototype["parse special authority slashes"] = function parseSpecialAuthoritySlashes(c) {
  619. if (c === 47 && this.input[this.pointer + 1] === 47) {
  620. this.state = "special authority ignore slashes";
  621. ++this.pointer;
  622. } else {
  623. this.parseError = true;
  624. this.state = "special authority ignore slashes";
  625. --this.pointer;
  626. }
  627. return true;
  628. };
  629. URLStateMachine.prototype["parse special authority ignore slashes"] = function parseSpecialAuthorityIgnoreSlashes(c) {
  630. if (c !== 47 && c !== 92) {
  631. this.state = "authority";
  632. --this.pointer;
  633. } else {
  634. this.parseError = true;
  635. }
  636. return true;
  637. };
  638. URLStateMachine.prototype["parse authority"] = function parseAuthority(c, cStr) {
  639. if (c === 64) {
  640. this.parseError = true;
  641. if (this.atFlag) {
  642. this.buffer = "%40" + this.buffer;
  643. }
  644. this.atFlag = true;
  645. // careful, this is based on buffer and has its own pointer (this.pointer != pointer) and inner chars
  646. const len = countSymbols(this.buffer);
  647. for (let pointer = 0; pointer < len; ++pointer) {
  648. const codePoint = this.buffer.codePointAt(pointer);
  649. if (codePoint === 58 && !this.passwordTokenSeenFlag) {
  650. this.passwordTokenSeenFlag = true;
  651. continue;
  652. }
  653. const encodedCodePoints = percentEncodeChar(codePoint, isUserinfoPercentEncode);
  654. if (this.passwordTokenSeenFlag) {
  655. this.url.password += encodedCodePoints;
  656. } else {
  657. this.url.username += encodedCodePoints;
  658. }
  659. }
  660. this.buffer = "";
  661. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  662. (isSpecial(this.url) && c === 92)) {
  663. if (this.atFlag && this.buffer === "") {
  664. this.parseError = true;
  665. return failure;
  666. }
  667. this.pointer -= countSymbols(this.buffer) + 1;
  668. this.buffer = "";
  669. this.state = "host";
  670. } else {
  671. this.buffer += cStr;
  672. }
  673. return true;
  674. };
  675. URLStateMachine.prototype["parse hostname"] =
  676. URLStateMachine.prototype["parse host"] = function parseHostName(c, cStr) {
  677. if (this.stateOverride && this.url.scheme === "file") {
  678. --this.pointer;
  679. this.state = "file host";
  680. } else if (c === 58 && !this.arrFlag) {
  681. if (this.buffer === "") {
  682. this.parseError = true;
  683. return failure;
  684. }
  685. const host = parseHost(this.buffer, isNotSpecial(this.url));
  686. if (host === failure) {
  687. return failure;
  688. }
  689. this.url.host = host;
  690. this.buffer = "";
  691. this.state = "port";
  692. if (this.stateOverride === "hostname") {
  693. return false;
  694. }
  695. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  696. (isSpecial(this.url) && c === 92)) {
  697. --this.pointer;
  698. if (isSpecial(this.url) && this.buffer === "") {
  699. this.parseError = true;
  700. return failure;
  701. } else if (this.stateOverride && this.buffer === "" &&
  702. (includesCredentials(this.url) || this.url.port !== null)) {
  703. this.parseError = true;
  704. return false;
  705. }
  706. const host = parseHost(this.buffer, isNotSpecial(this.url));
  707. if (host === failure) {
  708. return failure;
  709. }
  710. this.url.host = host;
  711. this.buffer = "";
  712. this.state = "path start";
  713. if (this.stateOverride) {
  714. return false;
  715. }
  716. } else {
  717. if (c === 91) {
  718. this.arrFlag = true;
  719. } else if (c === 93) {
  720. this.arrFlag = false;
  721. }
  722. this.buffer += cStr;
  723. }
  724. return true;
  725. };
  726. URLStateMachine.prototype["parse port"] = function parsePort(c, cStr) {
  727. if (infra.isASCIIDigit(c)) {
  728. this.buffer += cStr;
  729. } else if (isNaN(c) || c === 47 || c === 63 || c === 35 ||
  730. (isSpecial(this.url) && c === 92) ||
  731. this.stateOverride) {
  732. if (this.buffer !== "") {
  733. const port = parseInt(this.buffer);
  734. if (port > Math.pow(2, 16) - 1) {
  735. this.parseError = true;
  736. return failure;
  737. }
  738. this.url.port = port === defaultPort(this.url.scheme) ? null : port;
  739. this.buffer = "";
  740. }
  741. if (this.stateOverride) {
  742. return false;
  743. }
  744. this.state = "path start";
  745. --this.pointer;
  746. } else {
  747. this.parseError = true;
  748. return failure;
  749. }
  750. return true;
  751. };
  752. const fileOtherwiseCodePoints = new Set([47, 92, 63, 35]);
  753. function startsWithWindowsDriveLetter(input, pointer) {
  754. const length = input.length - pointer;
  755. return length >= 2 &&
  756. isWindowsDriveLetterCodePoints(input[pointer], input[pointer + 1]) &&
  757. (length === 2 || fileOtherwiseCodePoints.has(input[pointer + 2]));
  758. }
  759. URLStateMachine.prototype["parse file"] = function parseFile(c) {
  760. this.url.scheme = "file";
  761. if (c === 47 || c === 92) {
  762. if (c === 92) {
  763. this.parseError = true;
  764. }
  765. this.state = "file slash";
  766. } else if (this.base !== null && this.base.scheme === "file") {
  767. if (isNaN(c)) {
  768. this.url.host = this.base.host;
  769. this.url.path = this.base.path.slice();
  770. this.url.query = this.base.query;
  771. } else if (c === 63) {
  772. this.url.host = this.base.host;
  773. this.url.path = this.base.path.slice();
  774. this.url.query = "";
  775. this.state = "query";
  776. } else if (c === 35) {
  777. this.url.host = this.base.host;
  778. this.url.path = this.base.path.slice();
  779. this.url.query = this.base.query;
  780. this.url.fragment = "";
  781. this.state = "fragment";
  782. } else {
  783. if (!startsWithWindowsDriveLetter(this.input, this.pointer)) {
  784. this.url.host = this.base.host;
  785. this.url.path = this.base.path.slice();
  786. shortenPath(this.url);
  787. } else {
  788. this.parseError = true;
  789. }
  790. this.state = "path";
  791. --this.pointer;
  792. }
  793. } else {
  794. this.state = "path";
  795. --this.pointer;
  796. }
  797. return true;
  798. };
  799. URLStateMachine.prototype["parse file slash"] = function parseFileSlash(c) {
  800. if (c === 47 || c === 92) {
  801. if (c === 92) {
  802. this.parseError = true;
  803. }
  804. this.state = "file host";
  805. } else {
  806. if (this.base !== null && this.base.scheme === "file" &&
  807. !startsWithWindowsDriveLetter(this.input, this.pointer)) {
  808. if (isNormalizedWindowsDriveLetterString(this.base.path[0])) {
  809. this.url.path.push(this.base.path[0]);
  810. } else {
  811. this.url.host = this.base.host;
  812. }
  813. }
  814. this.state = "path";
  815. --this.pointer;
  816. }
  817. return true;
  818. };
  819. URLStateMachine.prototype["parse file host"] = function parseFileHost(c, cStr) {
  820. if (isNaN(c) || c === 47 || c === 92 || c === 63 || c === 35) {
  821. --this.pointer;
  822. if (!this.stateOverride && isWindowsDriveLetterString(this.buffer)) {
  823. this.parseError = true;
  824. this.state = "path";
  825. } else if (this.buffer === "") {
  826. this.url.host = "";
  827. if (this.stateOverride) {
  828. return false;
  829. }
  830. this.state = "path start";
  831. } else {
  832. let host = parseHost(this.buffer, isNotSpecial(this.url));
  833. if (host === failure) {
  834. return failure;
  835. }
  836. if (host === "localhost") {
  837. host = "";
  838. }
  839. this.url.host = host;
  840. if (this.stateOverride) {
  841. return false;
  842. }
  843. this.buffer = "";
  844. this.state = "path start";
  845. }
  846. } else {
  847. this.buffer += cStr;
  848. }
  849. return true;
  850. };
  851. URLStateMachine.prototype["parse path start"] = function parsePathStart(c) {
  852. if (isSpecial(this.url)) {
  853. if (c === 92) {
  854. this.parseError = true;
  855. }
  856. this.state = "path";
  857. if (c !== 47 && c !== 92) {
  858. --this.pointer;
  859. }
  860. } else if (!this.stateOverride && c === 63) {
  861. this.url.query = "";
  862. this.state = "query";
  863. } else if (!this.stateOverride && c === 35) {
  864. this.url.fragment = "";
  865. this.state = "fragment";
  866. } else if (c !== undefined) {
  867. this.state = "path";
  868. if (c !== 47) {
  869. --this.pointer;
  870. }
  871. }
  872. return true;
  873. };
  874. URLStateMachine.prototype["parse path"] = function parsePath(c) {
  875. if (isNaN(c) || c === 47 || (isSpecial(this.url) && c === 92) ||
  876. (!this.stateOverride && (c === 63 || c === 35))) {
  877. if (isSpecial(this.url) && c === 92) {
  878. this.parseError = true;
  879. }
  880. if (isDoubleDot(this.buffer)) {
  881. shortenPath(this.url);
  882. if (c !== 47 && !(isSpecial(this.url) && c === 92)) {
  883. this.url.path.push("");
  884. }
  885. } else if (isSingleDot(this.buffer) && c !== 47 &&
  886. !(isSpecial(this.url) && c === 92)) {
  887. this.url.path.push("");
  888. } else if (!isSingleDot(this.buffer)) {
  889. if (this.url.scheme === "file" && this.url.path.length === 0 && isWindowsDriveLetterString(this.buffer)) {
  890. if (this.url.host !== "" && this.url.host !== null) {
  891. this.parseError = true;
  892. this.url.host = "";
  893. }
  894. this.buffer = this.buffer[0] + ":";
  895. }
  896. this.url.path.push(this.buffer);
  897. }
  898. this.buffer = "";
  899. if (this.url.scheme === "file" && (c === undefined || c === 63 || c === 35)) {
  900. while (this.url.path.length > 1 && this.url.path[0] === "") {
  901. this.parseError = true;
  902. this.url.path.shift();
  903. }
  904. }
  905. if (c === 63) {
  906. this.url.query = "";
  907. this.state = "query";
  908. }
  909. if (c === 35) {
  910. this.url.fragment = "";
  911. this.state = "fragment";
  912. }
  913. } else {
  914. // TODO: If c is not a URL code point and not "%", parse error.
  915. if (c === 37 &&
  916. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  917. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  918. this.parseError = true;
  919. }
  920. this.buffer += percentEncodeChar(c, isPathPercentEncode);
  921. }
  922. return true;
  923. };
  924. URLStateMachine.prototype["parse cannot-be-a-base-URL path"] = function parseCannotBeABaseURLPath(c) {
  925. if (c === 63) {
  926. this.url.query = "";
  927. this.state = "query";
  928. } else if (c === 35) {
  929. this.url.fragment = "";
  930. this.state = "fragment";
  931. } else {
  932. // TODO: Add: not a URL code point
  933. if (!isNaN(c) && c !== 37) {
  934. this.parseError = true;
  935. }
  936. if (c === 37 &&
  937. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  938. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  939. this.parseError = true;
  940. }
  941. if (!isNaN(c)) {
  942. this.url.path[0] = this.url.path[0] + percentEncodeChar(c, isC0ControlPercentEncode);
  943. }
  944. }
  945. return true;
  946. };
  947. URLStateMachine.prototype["parse query"] = function parseQuery(c, cStr) {
  948. if (isNaN(c) || (!this.stateOverride && c === 35)) {
  949. if (!isSpecial(this.url) || this.url.scheme === "ws" || this.url.scheme === "wss") {
  950. this.encodingOverride = "utf-8";
  951. }
  952. const buffer = Buffer.from(this.buffer); // TODO: Use encoding override instead
  953. for (let i = 0; i < buffer.length; ++i) {
  954. if (buffer[i] < 0x21 ||
  955. buffer[i] > 0x7E ||
  956. buffer[i] === 0x22 || buffer[i] === 0x23 || buffer[i] === 0x3C || buffer[i] === 0x3E ||
  957. (buffer[i] === 0x27 && isSpecial(this.url))) {
  958. this.url.query += percentEncode(buffer[i]);
  959. } else {
  960. this.url.query += String.fromCodePoint(buffer[i]);
  961. }
  962. }
  963. this.buffer = "";
  964. if (c === 35) {
  965. this.url.fragment = "";
  966. this.state = "fragment";
  967. }
  968. } else {
  969. // TODO: If c is not a URL code point and not "%", parse error.
  970. if (c === 37 &&
  971. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  972. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  973. this.parseError = true;
  974. }
  975. this.buffer += cStr;
  976. }
  977. return true;
  978. };
  979. URLStateMachine.prototype["parse fragment"] = function parseFragment(c) {
  980. if (isNaN(c)) { // do nothing
  981. } else if (c === 0x0) {
  982. this.parseError = true;
  983. } else {
  984. // TODO: If c is not a URL code point and not "%", parse error.
  985. if (c === 37 &&
  986. (!infra.isASCIIHex(this.input[this.pointer + 1]) ||
  987. !infra.isASCIIHex(this.input[this.pointer + 2]))) {
  988. this.parseError = true;
  989. }
  990. this.url.fragment += percentEncodeChar(c, isFragmentPercentEncode);
  991. }
  992. return true;
  993. };
  994. function serializeURL(url, excludeFragment) {
  995. let output = url.scheme + ":";
  996. if (url.host !== null) {
  997. output += "//";
  998. if (url.username !== "" || url.password !== "") {
  999. output += url.username;
  1000. if (url.password !== "") {
  1001. output += ":" + url.password;
  1002. }
  1003. output += "@";
  1004. }
  1005. output += serializeHost(url.host);
  1006. if (url.port !== null) {
  1007. output += ":" + url.port;
  1008. }
  1009. } else if (url.host === null && url.scheme === "file") {
  1010. output += "//";
  1011. }
  1012. if (url.cannotBeABaseURL) {
  1013. output += url.path[0];
  1014. } else {
  1015. for (const string of url.path) {
  1016. output += "/" + string;
  1017. }
  1018. }
  1019. if (url.query !== null) {
  1020. output += "?" + url.query;
  1021. }
  1022. if (!excludeFragment && url.fragment !== null) {
  1023. output += "#" + url.fragment;
  1024. }
  1025. return output;
  1026. }
  1027. function serializeOrigin(tuple) {
  1028. let result = tuple.scheme + "://";
  1029. result += serializeHost(tuple.host);
  1030. if (tuple.port !== null) {
  1031. result += ":" + tuple.port;
  1032. }
  1033. return result;
  1034. }
  1035. module.exports.serializeURL = serializeURL;
  1036. module.exports.serializeURLOrigin = function (url) {
  1037. // https://url.spec.whatwg.org/#concept-url-origin
  1038. switch (url.scheme) {
  1039. case "blob":
  1040. try {
  1041. return module.exports.serializeURLOrigin(module.exports.parseURL(url.path[0]));
  1042. } catch (e) {
  1043. // serializing an opaque origin returns "null"
  1044. return "null";
  1045. }
  1046. case "ftp":
  1047. case "http":
  1048. case "https":
  1049. case "ws":
  1050. case "wss":
  1051. return serializeOrigin({
  1052. scheme: url.scheme,
  1053. host: url.host,
  1054. port: url.port
  1055. });
  1056. case "file":
  1057. // The spec says:
  1058. // > Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
  1059. // Browsers tested so far:
  1060. // - Chrome says "file://", but treats file: URLs as cross-origin for most (all?) purposes; see e.g.
  1061. // https://bugs.chromium.org/p/chromium/issues/detail?id=37586
  1062. // - Firefox says "null", but treats file: URLs as same-origin sometimes based on directory stuff; see
  1063. // https://developer.mozilla.org/en-US/docs/Archive/Misc_top_level/Same-origin_policy_for_file:_URIs
  1064. return "null";
  1065. default:
  1066. // serializing an opaque origin returns "null"
  1067. return "null";
  1068. }
  1069. };
  1070. module.exports.basicURLParse = function (input, options) {
  1071. if (options === undefined) {
  1072. options = {};
  1073. }
  1074. const usm = new URLStateMachine(input, options.baseURL, options.encodingOverride, options.url, options.stateOverride);
  1075. if (usm.failure) {
  1076. return null;
  1077. }
  1078. return usm.url;
  1079. };
  1080. module.exports.setTheUsername = function (url, username) {
  1081. url.username = "";
  1082. const decoded = punycode.ucs2.decode(username);
  1083. for (let i = 0; i < decoded.length; ++i) {
  1084. url.username += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1085. }
  1086. };
  1087. module.exports.setThePassword = function (url, password) {
  1088. url.password = "";
  1089. const decoded = punycode.ucs2.decode(password);
  1090. for (let i = 0; i < decoded.length; ++i) {
  1091. url.password += percentEncodeChar(decoded[i], isUserinfoPercentEncode);
  1092. }
  1093. };
  1094. module.exports.serializeHost = serializeHost;
  1095. module.exports.cannotHaveAUsernamePasswordPort = cannotHaveAUsernamePasswordPort;
  1096. module.exports.serializeInteger = function (integer) {
  1097. return String(integer);
  1098. };
  1099. module.exports.parseURL = function (input, options) {
  1100. if (options === undefined) {
  1101. options = {};
  1102. }
  1103. // We don't handle blobs, so this just delegates:
  1104. return module.exports.basicURLParse(input, { baseURL: options.baseURL, encodingOverride: options.encodingOverride });
  1105. };