123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588 |
- // parse a single path portion
- import { parseClass } from './brace-expressions.js';
- import { unescape } from './unescape.js';
- const types = new Set(['!', '?', '+', '*', '@']);
- const isExtglobType = (c) => types.has(c);
- // Patterns that get prepended to bind to the start of either the
- // entire string, or just a single path portion, to prevent dots
- // and/or traversal patterns, when needed.
- // Exts don't need the ^ or / bit, because the root binds that already.
- const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
- const startNoDot = '(?!\\.)';
- // characters that indicate a start of pattern needs the "no dots" bit,
- // because a dot *might* be matched. ( is not in the list, because in
- // the case of a child extglob, it will handle the prevention itself.
- const addPatternStart = new Set(['[', '.']);
- // cases where traversal is A-OK, no dot prevention needed
- const justDots = new Set(['..', '.']);
- const reSpecials = new Set('().*{}+?[]^$\\!');
- const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
- // any single thing other than /
- const qmark = '[^/]';
- // * => any number of characters
- const star = qmark + '*?';
- // use + when we need to ensure that *something* matches, because the * is
- // the only thing in the path portion.
- const starNoEmpty = qmark + '+?';
- // remove the \ chars that we added if we end up doing a nonmagic compare
- // const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
- export class AST {
- type;
- #root;
- #hasMagic;
- #uflag = false;
- #parts = [];
- #parent;
- #parentIndex;
- #negs;
- #filledNegs = false;
- #options;
- #toString;
- // set to true if it's an extglob with no children
- // (which really means one child of '')
- #emptyExt = false;
- constructor(type, parent, options = {}) {
- this.type = type;
- // extglobs are inherently magical
- if (type)
- this.#hasMagic = true;
- this.#parent = parent;
- this.#root = this.#parent ? this.#parent.#root : this;
- this.#options = this.#root === this ? options : this.#root.#options;
- this.#negs = this.#root === this ? [] : this.#root.#negs;
- if (type === '!' && !this.#root.#filledNegs)
- this.#negs.push(this);
- this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
- }
- get hasMagic() {
- /* c8 ignore start */
- if (this.#hasMagic !== undefined)
- return this.#hasMagic;
- /* c8 ignore stop */
- for (const p of this.#parts) {
- if (typeof p === 'string')
- continue;
- if (p.type || p.hasMagic)
- return (this.#hasMagic = true);
- }
- // note: will be undefined until we generate the regexp src and find out
- return this.#hasMagic;
- }
- // reconstructs the pattern
- toString() {
- if (this.#toString !== undefined)
- return this.#toString;
- if (!this.type) {
- return (this.#toString = this.#parts.map(p => String(p)).join(''));
- }
- else {
- return (this.#toString =
- this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
- }
- }
- #fillNegs() {
- /* c8 ignore start */
- if (this !== this.#root)
- throw new Error('should only call on root');
- if (this.#filledNegs)
- return this;
- /* c8 ignore stop */
- // call toString() once to fill this out
- this.toString();
- this.#filledNegs = true;
- let n;
- while ((n = this.#negs.pop())) {
- if (n.type !== '!')
- continue;
- // walk up the tree, appending everthing that comes AFTER parentIndex
- let p = n;
- let pp = p.#parent;
- while (pp) {
- for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
- for (const part of n.#parts) {
- /* c8 ignore start */
- if (typeof part === 'string') {
- throw new Error('string part in extglob AST??');
- }
- /* c8 ignore stop */
- part.copyIn(pp.#parts[i]);
- }
- }
- p = pp;
- pp = p.#parent;
- }
- }
- return this;
- }
- push(...parts) {
- for (const p of parts) {
- if (p === '')
- continue;
- /* c8 ignore start */
- if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
- throw new Error('invalid part: ' + p);
- }
- /* c8 ignore stop */
- this.#parts.push(p);
- }
- }
- toJSON() {
- const ret = this.type === null
- ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
- : [this.type, ...this.#parts.map(p => p.toJSON())];
- if (this.isStart() && !this.type)
- ret.unshift([]);
- if (this.isEnd() &&
- (this === this.#root ||
- (this.#root.#filledNegs && this.#parent?.type === '!'))) {
- ret.push({});
- }
- return ret;
- }
- isStart() {
- if (this.#root === this)
- return true;
- // if (this.type) return !!this.#parent?.isStart()
- if (!this.#parent?.isStart())
- return false;
- if (this.#parentIndex === 0)
- return true;
- // if everything AHEAD of this is a negation, then it's still the "start"
- const p = this.#parent;
- for (let i = 0; i < this.#parentIndex; i++) {
- const pp = p.#parts[i];
- if (!(pp instanceof AST && pp.type === '!')) {
- return false;
- }
- }
- return true;
- }
- isEnd() {
- if (this.#root === this)
- return true;
- if (this.#parent?.type === '!')
- return true;
- if (!this.#parent?.isEnd())
- return false;
- if (!this.type)
- return this.#parent?.isEnd();
- // if not root, it'll always have a parent
- /* c8 ignore start */
- const pl = this.#parent ? this.#parent.#parts.length : 0;
- /* c8 ignore stop */
- return this.#parentIndex === pl - 1;
- }
- copyIn(part) {
- if (typeof part === 'string')
- this.push(part);
- else
- this.push(part.clone(this));
- }
- clone(parent) {
- const c = new AST(this.type, parent);
- for (const p of this.#parts) {
- c.copyIn(p);
- }
- return c;
- }
- static #parseAST(str, ast, pos, opt) {
- let escaping = false;
- let inBrace = false;
- let braceStart = -1;
- let braceNeg = false;
- if (ast.type === null) {
- // outside of a extglob, append until we find a start
- let i = pos;
- let acc = '';
- while (i < str.length) {
- const c = str.charAt(i++);
- // still accumulate escapes at this point, but we do ignore
- // starts that are escaped
- if (escaping || c === '\\') {
- escaping = !escaping;
- acc += c;
- continue;
- }
- if (inBrace) {
- if (i === braceStart + 1) {
- if (c === '^' || c === '!') {
- braceNeg = true;
- }
- }
- else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
- inBrace = false;
- }
- acc += c;
- continue;
- }
- else if (c === '[') {
- inBrace = true;
- braceStart = i;
- braceNeg = false;
- acc += c;
- continue;
- }
- if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
- ast.push(acc);
- acc = '';
- const ext = new AST(c, ast);
- i = AST.#parseAST(str, ext, i, opt);
- ast.push(ext);
- continue;
- }
- acc += c;
- }
- ast.push(acc);
- return i;
- }
- // some kind of extglob, pos is at the (
- // find the next | or )
- let i = pos + 1;
- let part = new AST(null, ast);
- const parts = [];
- let acc = '';
- while (i < str.length) {
- const c = str.charAt(i++);
- // still accumulate escapes at this point, but we do ignore
- // starts that are escaped
- if (escaping || c === '\\') {
- escaping = !escaping;
- acc += c;
- continue;
- }
- if (inBrace) {
- if (i === braceStart + 1) {
- if (c === '^' || c === '!') {
- braceNeg = true;
- }
- }
- else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
- inBrace = false;
- }
- acc += c;
- continue;
- }
- else if (c === '[') {
- inBrace = true;
- braceStart = i;
- braceNeg = false;
- acc += c;
- continue;
- }
- if (isExtglobType(c) && str.charAt(i) === '(') {
- part.push(acc);
- acc = '';
- const ext = new AST(c, part);
- part.push(ext);
- i = AST.#parseAST(str, ext, i, opt);
- continue;
- }
- if (c === '|') {
- part.push(acc);
- acc = '';
- parts.push(part);
- part = new AST(null, ast);
- continue;
- }
- if (c === ')') {
- if (acc === '' && ast.#parts.length === 0) {
- ast.#emptyExt = true;
- }
- part.push(acc);
- acc = '';
- ast.push(...parts, part);
- return i;
- }
- acc += c;
- }
- // unfinished extglob
- // if we got here, it was a malformed extglob! not an extglob, but
- // maybe something else in there.
- ast.type = null;
- ast.#hasMagic = undefined;
- ast.#parts = [str.substring(pos - 1)];
- return i;
- }
- static fromGlob(pattern, options = {}) {
- const ast = new AST(null, undefined, options);
- AST.#parseAST(pattern, ast, 0, options);
- return ast;
- }
- // returns the regular expression if there's magic, or the unescaped
- // string if not.
- toMMPattern() {
- // should only be called on root
- /* c8 ignore start */
- if (this !== this.#root)
- return this.#root.toMMPattern();
- /* c8 ignore stop */
- const glob = this.toString();
- const [re, body, hasMagic, uflag] = this.toRegExpSource();
- // if we're in nocase mode, and not nocaseMagicOnly, then we do
- // still need a regular expression if we have to case-insensitively
- // match capital/lowercase characters.
- const anyMagic = hasMagic ||
- this.#hasMagic ||
- (this.#options.nocase &&
- !this.#options.nocaseMagicOnly &&
- glob.toUpperCase() !== glob.toLowerCase());
- if (!anyMagic) {
- return body;
- }
- const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
- return Object.assign(new RegExp(`^${re}$`, flags), {
- _src: re,
- _glob: glob,
- });
- }
- get options() {
- return this.#options;
- }
- // returns the string match, the regexp source, whether there's magic
- // in the regexp (so a regular expression is required) and whether or
- // not the uflag is needed for the regular expression (for posix classes)
- // TODO: instead of injecting the start/end at this point, just return
- // the BODY of the regexp, along with the start/end portions suitable
- // for binding the start/end in either a joined full-path makeRe context
- // (where we bind to (^|/), or a standalone matchPart context (where
- // we bind to ^, and not /). Otherwise slashes get duped!
- //
- // In part-matching mode, the start is:
- // - if not isStart: nothing
- // - if traversal possible, but not allowed: ^(?!\.\.?$)
- // - if dots allowed or not possible: ^
- // - if dots possible and not allowed: ^(?!\.)
- // end is:
- // - if not isEnd(): nothing
- // - else: $
- //
- // In full-path matching mode, we put the slash at the START of the
- // pattern, so start is:
- // - if first pattern: same as part-matching mode
- // - if not isStart(): nothing
- // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
- // - if dots allowed or not possible: /
- // - if dots possible and not allowed: /(?!\.)
- // end is:
- // - if last pattern, same as part-matching mode
- // - else nothing
- //
- // Always put the (?:$|/) on negated tails, though, because that has to be
- // there to bind the end of the negated pattern portion, and it's easier to
- // just stick it in now rather than try to inject it later in the middle of
- // the pattern.
- //
- // We can just always return the same end, and leave it up to the caller
- // to know whether it's going to be used joined or in parts.
- // And, if the start is adjusted slightly, can do the same there:
- // - if not isStart: nothing
- // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
- // - if dots allowed or not possible: (?:/|^)
- // - if dots possible and not allowed: (?:/|^)(?!\.)
- //
- // But it's better to have a simpler binding without a conditional, for
- // performance, so probably better to return both start options.
- //
- // Then the caller just ignores the end if it's not the first pattern,
- // and the start always gets applied.
- //
- // But that's always going to be $ if it's the ending pattern, or nothing,
- // so the caller can just attach $ at the end of the pattern when building.
- //
- // So the todo is:
- // - better detect what kind of start is needed
- // - return both flavors of starting pattern
- // - attach $ at the end of the pattern when creating the actual RegExp
- //
- // Ah, but wait, no, that all only applies to the root when the first pattern
- // is not an extglob. If the first pattern IS an extglob, then we need all
- // that dot prevention biz to live in the extglob portions, because eg
- // +(*|.x*) can match .xy but not .yx.
- //
- // So, return the two flavors if it's #root and the first child is not an
- // AST, otherwise leave it to the child AST to handle it, and there,
- // use the (?:^|/) style of start binding.
- //
- // Even simplified further:
- // - Since the start for a join is eg /(?!\.) and the start for a part
- // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
- // or start or whatever) and prepend ^ or / at the Regexp construction.
- toRegExpSource(allowDot) {
- const dot = allowDot ?? !!this.#options.dot;
- if (this.#root === this)
- this.#fillNegs();
- if (!this.type) {
- const noEmpty = this.isStart() && this.isEnd();
- const src = this.#parts
- .map(p => {
- const [re, _, hasMagic, uflag] = typeof p === 'string'
- ? AST.#parseGlob(p, this.#hasMagic, noEmpty)
- : p.toRegExpSource(allowDot);
- this.#hasMagic = this.#hasMagic || hasMagic;
- this.#uflag = this.#uflag || uflag;
- return re;
- })
- .join('');
- let start = '';
- if (this.isStart()) {
- if (typeof this.#parts[0] === 'string') {
- // this is the string that will match the start of the pattern,
- // so we need to protect against dots and such.
- // '.' and '..' cannot match unless the pattern is that exactly,
- // even if it starts with . or dot:true is set.
- const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
- if (!dotTravAllowed) {
- const aps = addPatternStart;
- // check if we have a possibility of matching . or ..,
- // and prevent that.
- const needNoTrav =
- // dots are allowed, and the pattern starts with [ or .
- (dot && aps.has(src.charAt(0))) ||
- // the pattern starts with \., and then [ or .
- (src.startsWith('\\.') && aps.has(src.charAt(2))) ||
- // the pattern starts with \.\., and then [ or .
- (src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
- // no need to prevent dots if it can't match a dot, or if a
- // sub-pattern will be preventing it anyway.
- const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
- start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
- }
- }
- }
- // append the "end of path portion" pattern to negation tails
- let end = '';
- if (this.isEnd() &&
- this.#root.#filledNegs &&
- this.#parent?.type === '!') {
- end = '(?:$|\\/)';
- }
- const final = start + src + end;
- return [
- final,
- unescape(src),
- (this.#hasMagic = !!this.#hasMagic),
- this.#uflag,
- ];
- }
- // We need to calculate the body *twice* if it's a repeat pattern
- // at the start, once in nodot mode, then again in dot mode, so a
- // pattern like *(?) can match 'x.y'
- const repeated = this.type === '*' || this.type === '+';
- // some kind of extglob
- const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
- let body = this.#partsToRegExp(dot);
- if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
- // invalid extglob, has to at least be *something* present, if it's
- // the entire path portion.
- const s = this.toString();
- this.#parts = [s];
- this.type = null;
- this.#hasMagic = undefined;
- return [s, unescape(this.toString()), false, false];
- }
- // XXX abstract out this map method
- let bodyDotAllowed = !repeated || allowDot || dot || !startNoDot
- ? ''
- : this.#partsToRegExp(true);
- if (bodyDotAllowed === body) {
- bodyDotAllowed = '';
- }
- if (bodyDotAllowed) {
- body = `(?:${body})(?:${bodyDotAllowed})*?`;
- }
- // an empty !() is exactly equivalent to a starNoEmpty
- let final = '';
- if (this.type === '!' && this.#emptyExt) {
- final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
- }
- else {
- const close = this.type === '!'
- ? // !() must match something,but !(x) can match ''
- '))' +
- (this.isStart() && !dot && !allowDot ? startNoDot : '') +
- star +
- ')'
- : this.type === '@'
- ? ')'
- : this.type === '?'
- ? ')?'
- : this.type === '+' && bodyDotAllowed
- ? ')'
- : this.type === '*' && bodyDotAllowed
- ? `)?`
- : `)${this.type}`;
- final = start + body + close;
- }
- return [
- final,
- unescape(body),
- (this.#hasMagic = !!this.#hasMagic),
- this.#uflag,
- ];
- }
- #partsToRegExp(dot) {
- return this.#parts
- .map(p => {
- // extglob ASTs should only contain parent ASTs
- /* c8 ignore start */
- if (typeof p === 'string') {
- throw new Error('string type in extglob ast??');
- }
- /* c8 ignore stop */
- // can ignore hasMagic, because extglobs are already always magic
- const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
- this.#uflag = this.#uflag || uflag;
- return re;
- })
- .filter(p => !(this.isStart() && this.isEnd()) || !!p)
- .join('|');
- }
- static #parseGlob(glob, hasMagic, noEmpty = false) {
- let escaping = false;
- let re = '';
- let uflag = false;
- for (let i = 0; i < glob.length; i++) {
- const c = glob.charAt(i);
- if (escaping) {
- escaping = false;
- re += (reSpecials.has(c) ? '\\' : '') + c;
- continue;
- }
- if (c === '\\') {
- if (i === glob.length - 1) {
- re += '\\\\';
- }
- else {
- escaping = true;
- }
- continue;
- }
- if (c === '[') {
- const [src, needUflag, consumed, magic] = parseClass(glob, i);
- if (consumed) {
- re += src;
- uflag = uflag || needUflag;
- i += consumed - 1;
- hasMagic = hasMagic || magic;
- continue;
- }
- }
- if (c === '*') {
- if (noEmpty && glob === '*')
- re += starNoEmpty;
- else
- re += star;
- hasMagic = true;
- continue;
- }
- if (c === '?') {
- re += qmark;
- hasMagic = true;
- continue;
- }
- re += regExpEscape(c);
- }
- return [re, unescape(glob), !!hasMagic, uflag];
- }
- }
- //# sourceMappingURL=ast.js.map
|