ast.js 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. "use strict";
  2. // parse a single path portion
  3. Object.defineProperty(exports, "__esModule", { value: true });
  4. exports.AST = void 0;
  5. const brace_expressions_js_1 = require("./brace-expressions.js");
  6. const unescape_js_1 = require("./unescape.js");
  7. const types = new Set(['!', '?', '+', '*', '@']);
  8. const isExtglobType = (c) => types.has(c);
  9. // Patterns that get prepended to bind to the start of either the
  10. // entire string, or just a single path portion, to prevent dots
  11. // and/or traversal patterns, when needed.
  12. // Exts don't need the ^ or / bit, because the root binds that already.
  13. const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
  14. const startNoDot = '(?!\\.)';
  15. // characters that indicate a start of pattern needs the "no dots" bit,
  16. // because a dot *might* be matched. ( is not in the list, because in
  17. // the case of a child extglob, it will handle the prevention itself.
  18. const addPatternStart = new Set(['[', '.']);
  19. // cases where traversal is A-OK, no dot prevention needed
  20. const justDots = new Set(['..', '.']);
  21. const reSpecials = new Set('().*{}+?[]^$\\!');
  22. const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
  23. // any single thing other than /
  24. const qmark = '[^/]';
  25. // * => any number of characters
  26. const star = qmark + '*?';
  27. // use + when we need to ensure that *something* matches, because the * is
  28. // the only thing in the path portion.
  29. const starNoEmpty = qmark + '+?';
  30. // remove the \ chars that we added if we end up doing a nonmagic compare
  31. // const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
  32. class AST {
  33. type;
  34. #root;
  35. #hasMagic;
  36. #uflag = false;
  37. #parts = [];
  38. #parent;
  39. #parentIndex;
  40. #negs;
  41. #filledNegs = false;
  42. #options;
  43. #toString;
  44. // set to true if it's an extglob with no children
  45. // (which really means one child of '')
  46. #emptyExt = false;
  47. constructor(type, parent, options = {}) {
  48. this.type = type;
  49. // extglobs are inherently magical
  50. if (type)
  51. this.#hasMagic = true;
  52. this.#parent = parent;
  53. this.#root = this.#parent ? this.#parent.#root : this;
  54. this.#options = this.#root === this ? options : this.#root.#options;
  55. this.#negs = this.#root === this ? [] : this.#root.#negs;
  56. if (type === '!' && !this.#root.#filledNegs)
  57. this.#negs.push(this);
  58. this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
  59. }
  60. get hasMagic() {
  61. /* c8 ignore start */
  62. if (this.#hasMagic !== undefined)
  63. return this.#hasMagic;
  64. /* c8 ignore stop */
  65. for (const p of this.#parts) {
  66. if (typeof p === 'string')
  67. continue;
  68. if (p.type || p.hasMagic)
  69. return (this.#hasMagic = true);
  70. }
  71. // note: will be undefined until we generate the regexp src and find out
  72. return this.#hasMagic;
  73. }
  74. // reconstructs the pattern
  75. toString() {
  76. if (this.#toString !== undefined)
  77. return this.#toString;
  78. if (!this.type) {
  79. return (this.#toString = this.#parts.map(p => String(p)).join(''));
  80. }
  81. else {
  82. return (this.#toString =
  83. this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
  84. }
  85. }
  86. #fillNegs() {
  87. /* c8 ignore start */
  88. if (this !== this.#root)
  89. throw new Error('should only call on root');
  90. if (this.#filledNegs)
  91. return this;
  92. /* c8 ignore stop */
  93. // call toString() once to fill this out
  94. this.toString();
  95. this.#filledNegs = true;
  96. let n;
  97. while ((n = this.#negs.pop())) {
  98. if (n.type !== '!')
  99. continue;
  100. // walk up the tree, appending everthing that comes AFTER parentIndex
  101. let p = n;
  102. let pp = p.#parent;
  103. while (pp) {
  104. for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
  105. for (const part of n.#parts) {
  106. /* c8 ignore start */
  107. if (typeof part === 'string') {
  108. throw new Error('string part in extglob AST??');
  109. }
  110. /* c8 ignore stop */
  111. part.copyIn(pp.#parts[i]);
  112. }
  113. }
  114. p = pp;
  115. pp = p.#parent;
  116. }
  117. }
  118. return this;
  119. }
  120. push(...parts) {
  121. for (const p of parts) {
  122. if (p === '')
  123. continue;
  124. /* c8 ignore start */
  125. if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
  126. throw new Error('invalid part: ' + p);
  127. }
  128. /* c8 ignore stop */
  129. this.#parts.push(p);
  130. }
  131. }
  132. toJSON() {
  133. const ret = this.type === null
  134. ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
  135. : [this.type, ...this.#parts.map(p => p.toJSON())];
  136. if (this.isStart() && !this.type)
  137. ret.unshift([]);
  138. if (this.isEnd() &&
  139. (this === this.#root ||
  140. (this.#root.#filledNegs && this.#parent?.type === '!'))) {
  141. ret.push({});
  142. }
  143. return ret;
  144. }
  145. isStart() {
  146. if (this.#root === this)
  147. return true;
  148. // if (this.type) return !!this.#parent?.isStart()
  149. if (!this.#parent?.isStart())
  150. return false;
  151. if (this.#parentIndex === 0)
  152. return true;
  153. // if everything AHEAD of this is a negation, then it's still the "start"
  154. const p = this.#parent;
  155. for (let i = 0; i < this.#parentIndex; i++) {
  156. const pp = p.#parts[i];
  157. if (!(pp instanceof AST && pp.type === '!')) {
  158. return false;
  159. }
  160. }
  161. return true;
  162. }
  163. isEnd() {
  164. if (this.#root === this)
  165. return true;
  166. if (this.#parent?.type === '!')
  167. return true;
  168. if (!this.#parent?.isEnd())
  169. return false;
  170. if (!this.type)
  171. return this.#parent?.isEnd();
  172. // if not root, it'll always have a parent
  173. /* c8 ignore start */
  174. const pl = this.#parent ? this.#parent.#parts.length : 0;
  175. /* c8 ignore stop */
  176. return this.#parentIndex === pl - 1;
  177. }
  178. copyIn(part) {
  179. if (typeof part === 'string')
  180. this.push(part);
  181. else
  182. this.push(part.clone(this));
  183. }
  184. clone(parent) {
  185. const c = new AST(this.type, parent);
  186. for (const p of this.#parts) {
  187. c.copyIn(p);
  188. }
  189. return c;
  190. }
  191. static #parseAST(str, ast, pos, opt) {
  192. let escaping = false;
  193. let inBrace = false;
  194. let braceStart = -1;
  195. let braceNeg = false;
  196. if (ast.type === null) {
  197. // outside of a extglob, append until we find a start
  198. let i = pos;
  199. let acc = '';
  200. while (i < str.length) {
  201. const c = str.charAt(i++);
  202. // still accumulate escapes at this point, but we do ignore
  203. // starts that are escaped
  204. if (escaping || c === '\\') {
  205. escaping = !escaping;
  206. acc += c;
  207. continue;
  208. }
  209. if (inBrace) {
  210. if (i === braceStart + 1) {
  211. if (c === '^' || c === '!') {
  212. braceNeg = true;
  213. }
  214. }
  215. else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
  216. inBrace = false;
  217. }
  218. acc += c;
  219. continue;
  220. }
  221. else if (c === '[') {
  222. inBrace = true;
  223. braceStart = i;
  224. braceNeg = false;
  225. acc += c;
  226. continue;
  227. }
  228. if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
  229. ast.push(acc);
  230. acc = '';
  231. const ext = new AST(c, ast);
  232. i = AST.#parseAST(str, ext, i, opt);
  233. ast.push(ext);
  234. continue;
  235. }
  236. acc += c;
  237. }
  238. ast.push(acc);
  239. return i;
  240. }
  241. // some kind of extglob, pos is at the (
  242. // find the next | or )
  243. let i = pos + 1;
  244. let part = new AST(null, ast);
  245. const parts = [];
  246. let acc = '';
  247. while (i < str.length) {
  248. const c = str.charAt(i++);
  249. // still accumulate escapes at this point, but we do ignore
  250. // starts that are escaped
  251. if (escaping || c === '\\') {
  252. escaping = !escaping;
  253. acc += c;
  254. continue;
  255. }
  256. if (inBrace) {
  257. if (i === braceStart + 1) {
  258. if (c === '^' || c === '!') {
  259. braceNeg = true;
  260. }
  261. }
  262. else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
  263. inBrace = false;
  264. }
  265. acc += c;
  266. continue;
  267. }
  268. else if (c === '[') {
  269. inBrace = true;
  270. braceStart = i;
  271. braceNeg = false;
  272. acc += c;
  273. continue;
  274. }
  275. if (isExtglobType(c) && str.charAt(i) === '(') {
  276. part.push(acc);
  277. acc = '';
  278. const ext = new AST(c, part);
  279. part.push(ext);
  280. i = AST.#parseAST(str, ext, i, opt);
  281. continue;
  282. }
  283. if (c === '|') {
  284. part.push(acc);
  285. acc = '';
  286. parts.push(part);
  287. part = new AST(null, ast);
  288. continue;
  289. }
  290. if (c === ')') {
  291. if (acc === '' && ast.#parts.length === 0) {
  292. ast.#emptyExt = true;
  293. }
  294. part.push(acc);
  295. acc = '';
  296. ast.push(...parts, part);
  297. return i;
  298. }
  299. acc += c;
  300. }
  301. // unfinished extglob
  302. // if we got here, it was a malformed extglob! not an extglob, but
  303. // maybe something else in there.
  304. ast.type = null;
  305. ast.#hasMagic = undefined;
  306. ast.#parts = [str.substring(pos - 1)];
  307. return i;
  308. }
  309. static fromGlob(pattern, options = {}) {
  310. const ast = new AST(null, undefined, options);
  311. AST.#parseAST(pattern, ast, 0, options);
  312. return ast;
  313. }
  314. // returns the regular expression if there's magic, or the unescaped
  315. // string if not.
  316. toMMPattern() {
  317. // should only be called on root
  318. /* c8 ignore start */
  319. if (this !== this.#root)
  320. return this.#root.toMMPattern();
  321. /* c8 ignore stop */
  322. const glob = this.toString();
  323. const [re, body, hasMagic, uflag] = this.toRegExpSource();
  324. // if we're in nocase mode, and not nocaseMagicOnly, then we do
  325. // still need a regular expression if we have to case-insensitively
  326. // match capital/lowercase characters.
  327. const anyMagic = hasMagic ||
  328. this.#hasMagic ||
  329. (this.#options.nocase &&
  330. !this.#options.nocaseMagicOnly &&
  331. glob.toUpperCase() !== glob.toLowerCase());
  332. if (!anyMagic) {
  333. return body;
  334. }
  335. const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
  336. return Object.assign(new RegExp(`^${re}$`, flags), {
  337. _src: re,
  338. _glob: glob,
  339. });
  340. }
  341. get options() {
  342. return this.#options;
  343. }
  344. // returns the string match, the regexp source, whether there's magic
  345. // in the regexp (so a regular expression is required) and whether or
  346. // not the uflag is needed for the regular expression (for posix classes)
  347. // TODO: instead of injecting the start/end at this point, just return
  348. // the BODY of the regexp, along with the start/end portions suitable
  349. // for binding the start/end in either a joined full-path makeRe context
  350. // (where we bind to (^|/), or a standalone matchPart context (where
  351. // we bind to ^, and not /). Otherwise slashes get duped!
  352. //
  353. // In part-matching mode, the start is:
  354. // - if not isStart: nothing
  355. // - if traversal possible, but not allowed: ^(?!\.\.?$)
  356. // - if dots allowed or not possible: ^
  357. // - if dots possible and not allowed: ^(?!\.)
  358. // end is:
  359. // - if not isEnd(): nothing
  360. // - else: $
  361. //
  362. // In full-path matching mode, we put the slash at the START of the
  363. // pattern, so start is:
  364. // - if first pattern: same as part-matching mode
  365. // - if not isStart(): nothing
  366. // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
  367. // - if dots allowed or not possible: /
  368. // - if dots possible and not allowed: /(?!\.)
  369. // end is:
  370. // - if last pattern, same as part-matching mode
  371. // - else nothing
  372. //
  373. // Always put the (?:$|/) on negated tails, though, because that has to be
  374. // there to bind the end of the negated pattern portion, and it's easier to
  375. // just stick it in now rather than try to inject it later in the middle of
  376. // the pattern.
  377. //
  378. // We can just always return the same end, and leave it up to the caller
  379. // to know whether it's going to be used joined or in parts.
  380. // And, if the start is adjusted slightly, can do the same there:
  381. // - if not isStart: nothing
  382. // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
  383. // - if dots allowed or not possible: (?:/|^)
  384. // - if dots possible and not allowed: (?:/|^)(?!\.)
  385. //
  386. // But it's better to have a simpler binding without a conditional, for
  387. // performance, so probably better to return both start options.
  388. //
  389. // Then the caller just ignores the end if it's not the first pattern,
  390. // and the start always gets applied.
  391. //
  392. // But that's always going to be $ if it's the ending pattern, or nothing,
  393. // so the caller can just attach $ at the end of the pattern when building.
  394. //
  395. // So the todo is:
  396. // - better detect what kind of start is needed
  397. // - return both flavors of starting pattern
  398. // - attach $ at the end of the pattern when creating the actual RegExp
  399. //
  400. // Ah, but wait, no, that all only applies to the root when the first pattern
  401. // is not an extglob. If the first pattern IS an extglob, then we need all
  402. // that dot prevention biz to live in the extglob portions, because eg
  403. // +(*|.x*) can match .xy but not .yx.
  404. //
  405. // So, return the two flavors if it's #root and the first child is not an
  406. // AST, otherwise leave it to the child AST to handle it, and there,
  407. // use the (?:^|/) style of start binding.
  408. //
  409. // Even simplified further:
  410. // - Since the start for a join is eg /(?!\.) and the start for a part
  411. // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
  412. // or start or whatever) and prepend ^ or / at the Regexp construction.
  413. toRegExpSource(allowDot) {
  414. const dot = allowDot ?? !!this.#options.dot;
  415. if (this.#root === this)
  416. this.#fillNegs();
  417. if (!this.type) {
  418. const noEmpty = this.isStart() && this.isEnd();
  419. const src = this.#parts
  420. .map(p => {
  421. const [re, _, hasMagic, uflag] = typeof p === 'string'
  422. ? AST.#parseGlob(p, this.#hasMagic, noEmpty)
  423. : p.toRegExpSource(allowDot);
  424. this.#hasMagic = this.#hasMagic || hasMagic;
  425. this.#uflag = this.#uflag || uflag;
  426. return re;
  427. })
  428. .join('');
  429. let start = '';
  430. if (this.isStart()) {
  431. if (typeof this.#parts[0] === 'string') {
  432. // this is the string that will match the start of the pattern,
  433. // so we need to protect against dots and such.
  434. // '.' and '..' cannot match unless the pattern is that exactly,
  435. // even if it starts with . or dot:true is set.
  436. const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
  437. if (!dotTravAllowed) {
  438. const aps = addPatternStart;
  439. // check if we have a possibility of matching . or ..,
  440. // and prevent that.
  441. const needNoTrav =
  442. // dots are allowed, and the pattern starts with [ or .
  443. (dot && aps.has(src.charAt(0))) ||
  444. // the pattern starts with \., and then [ or .
  445. (src.startsWith('\\.') && aps.has(src.charAt(2))) ||
  446. // the pattern starts with \.\., and then [ or .
  447. (src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
  448. // no need to prevent dots if it can't match a dot, or if a
  449. // sub-pattern will be preventing it anyway.
  450. const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
  451. start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
  452. }
  453. }
  454. }
  455. // append the "end of path portion" pattern to negation tails
  456. let end = '';
  457. if (this.isEnd() &&
  458. this.#root.#filledNegs &&
  459. this.#parent?.type === '!') {
  460. end = '(?:$|\\/)';
  461. }
  462. const final = start + src + end;
  463. return [
  464. final,
  465. (0, unescape_js_1.unescape)(src),
  466. (this.#hasMagic = !!this.#hasMagic),
  467. this.#uflag,
  468. ];
  469. }
  470. // We need to calculate the body *twice* if it's a repeat pattern
  471. // at the start, once in nodot mode, then again in dot mode, so a
  472. // pattern like *(?) can match 'x.y'
  473. const repeated = this.type === '*' || this.type === '+';
  474. // some kind of extglob
  475. const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
  476. let body = this.#partsToRegExp(dot);
  477. if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
  478. // invalid extglob, has to at least be *something* present, if it's
  479. // the entire path portion.
  480. const s = this.toString();
  481. this.#parts = [s];
  482. this.type = null;
  483. this.#hasMagic = undefined;
  484. return [s, (0, unescape_js_1.unescape)(this.toString()), false, false];
  485. }
  486. // XXX abstract out this map method
  487. let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
  488. ? ''
  489. : this.#partsToRegExp(true);
  490. if (bodyDotAllowed === body) {
  491. bodyDotAllowed = '';
  492. }
  493. if (bodyDotAllowed) {
  494. body = `(?:${body})(?:${bodyDotAllowed})*?`;
  495. }
  496. // an empty !() is exactly equivalent to a starNoEmpty
  497. let final = '';
  498. if (this.type === '!' && this.#emptyExt) {
  499. final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
  500. }
  501. else {
  502. const close = this.type === '!'
  503. ? // !() must match something,but !(x) can match ''
  504. '))' +
  505. (this.isStart() && !dot && !allowDot ? startNoDot : '') +
  506. star +
  507. ')'
  508. : this.type === '@'
  509. ? ')'
  510. : this.type === '?'
  511. ? ')?'
  512. : this.type === '+' && bodyDotAllowed
  513. ? ')'
  514. : this.type === '*' && bodyDotAllowed
  515. ? `)?`
  516. : `)${this.type}`;
  517. final = start + body + close;
  518. }
  519. return [
  520. final,
  521. (0, unescape_js_1.unescape)(body),
  522. (this.#hasMagic = !!this.#hasMagic),
  523. this.#uflag,
  524. ];
  525. }
  526. #partsToRegExp(dot) {
  527. return this.#parts
  528. .map(p => {
  529. // extglob ASTs should only contain parent ASTs
  530. /* c8 ignore start */
  531. if (typeof p === 'string') {
  532. throw new Error('string type in extglob ast??');
  533. }
  534. /* c8 ignore stop */
  535. // can ignore hasMagic, because extglobs are already always magic
  536. const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
  537. this.#uflag = this.#uflag || uflag;
  538. return re;
  539. })
  540. .filter(p => !(this.isStart() && this.isEnd()) || !!p)
  541. .join('|');
  542. }
  543. static #parseGlob(glob, hasMagic, noEmpty = false) {
  544. let escaping = false;
  545. let re = '';
  546. let uflag = false;
  547. for (let i = 0; i < glob.length; i++) {
  548. const c = glob.charAt(i);
  549. if (escaping) {
  550. escaping = false;
  551. re += (reSpecials.has(c) ? '\\' : '') + c;
  552. continue;
  553. }
  554. if (c === '\\') {
  555. if (i === glob.length - 1) {
  556. re += '\\\\';
  557. }
  558. else {
  559. escaping = true;
  560. }
  561. continue;
  562. }
  563. if (c === '[') {
  564. const [src, needUflag, consumed, magic] = (0, brace_expressions_js_1.parseClass)(glob, i);
  565. if (consumed) {
  566. re += src;
  567. uflag = uflag || needUflag;
  568. i += consumed - 1;
  569. hasMagic = hasMagic || magic;
  570. continue;
  571. }
  572. }
  573. if (c === '*') {
  574. if (noEmpty && glob === '*')
  575. re += starNoEmpty;
  576. else
  577. re += star;
  578. hasMagic = true;
  579. continue;
  580. }
  581. if (c === '?') {
  582. re += qmark;
  583. hasMagic = true;
  584. continue;
  585. }
  586. re += regExpEscape(c);
  587. }
  588. return [re, (0, unescape_js_1.unescape)(glob), !!hasMagic, uflag];
  589. }
  590. }
  591. exports.AST = AST;
  592. //# sourceMappingURL=ast.js.map