brace-expressions.js 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. "use strict";
  2. // translate the various posix character classes into unicode properties
  3. // this works across all unicode locales
  4. Object.defineProperty(exports, "__esModule", { value: true });
  5. exports.parseClass = void 0;
  6. // { <posix class>: [<translation>, /u flag required, negated]
  7. const posixClasses = {
  8. '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
  9. '[:alpha:]': ['\\p{L}\\p{Nl}', true],
  10. '[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
  11. '[:blank:]': ['\\p{Zs}\\t', true],
  12. '[:cntrl:]': ['\\p{Cc}', true],
  13. '[:digit:]': ['\\p{Nd}', true],
  14. '[:graph:]': ['\\p{Z}\\p{C}', true, true],
  15. '[:lower:]': ['\\p{Ll}', true],
  16. '[:print:]': ['\\p{C}', true],
  17. '[:punct:]': ['\\p{P}', true],
  18. '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
  19. '[:upper:]': ['\\p{Lu}', true],
  20. '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
  21. '[:xdigit:]': ['A-Fa-f0-9', false],
  22. };
  23. // only need to escape a few things inside of brace expressions
  24. // escapes: [ \ ] -
  25. const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
  26. // escape all regexp magic characters
  27. const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
  28. // everything has already been escaped, we just have to join
  29. const rangesToString = (ranges) => ranges.join('');
  30. // takes a glob string at a posix brace expression, and returns
  31. // an equivalent regular expression source, and boolean indicating
  32. // whether the /u flag needs to be applied, and the number of chars
  33. // consumed to parse the character class.
  34. // This also removes out of order ranges, and returns ($.) if the
  35. // entire class just no good.
  36. const parseClass = (glob, position) => {
  37. const pos = position;
  38. /* c8 ignore start */
  39. if (glob.charAt(pos) !== '[') {
  40. throw new Error('not in a brace expression');
  41. }
  42. /* c8 ignore stop */
  43. const ranges = [];
  44. const negs = [];
  45. let i = pos + 1;
  46. let sawStart = false;
  47. let uflag = false;
  48. let escaping = false;
  49. let negate = false;
  50. let endPos = pos;
  51. let rangeStart = '';
  52. WHILE: while (i < glob.length) {
  53. const c = glob.charAt(i);
  54. if ((c === '!' || c === '^') && i === pos + 1) {
  55. negate = true;
  56. i++;
  57. continue;
  58. }
  59. if (c === ']' && sawStart && !escaping) {
  60. endPos = i + 1;
  61. break;
  62. }
  63. sawStart = true;
  64. if (c === '\\') {
  65. if (!escaping) {
  66. escaping = true;
  67. i++;
  68. continue;
  69. }
  70. // escaped \ char, fall through and treat like normal char
  71. }
  72. if (c === '[' && !escaping) {
  73. // either a posix class, a collation equivalent, or just a [
  74. for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
  75. if (glob.startsWith(cls, i)) {
  76. // invalid, [a-[] is fine, but not [a-[:alpha]]
  77. if (rangeStart) {
  78. return ['$.', false, glob.length - pos, true];
  79. }
  80. i += cls.length;
  81. if (neg)
  82. negs.push(unip);
  83. else
  84. ranges.push(unip);
  85. uflag = uflag || u;
  86. continue WHILE;
  87. }
  88. }
  89. }
  90. // now it's just a normal character, effectively
  91. escaping = false;
  92. if (rangeStart) {
  93. // throw this range away if it's not valid, but others
  94. // can still match.
  95. if (c > rangeStart) {
  96. ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
  97. }
  98. else if (c === rangeStart) {
  99. ranges.push(braceEscape(c));
  100. }
  101. rangeStart = '';
  102. i++;
  103. continue;
  104. }
  105. // now might be the start of a range.
  106. // can be either c-d or c-] or c<more...>] or c] at this point
  107. if (glob.startsWith('-]', i + 1)) {
  108. ranges.push(braceEscape(c + '-'));
  109. i += 2;
  110. continue;
  111. }
  112. if (glob.startsWith('-', i + 1)) {
  113. rangeStart = c;
  114. i += 2;
  115. continue;
  116. }
  117. // not the start of a range, just a single character
  118. ranges.push(braceEscape(c));
  119. i++;
  120. }
  121. if (endPos < i) {
  122. // didn't see the end of the class, not a valid class,
  123. // but might still be valid as a literal match.
  124. return ['', false, 0, false];
  125. }
  126. // if we got no ranges and no negates, then we have a range that
  127. // cannot possibly match anything, and that poisons the whole glob
  128. if (!ranges.length && !negs.length) {
  129. return ['$.', false, glob.length - pos, true];
  130. }
  131. // if we got one positive range, and it's a single character, then that's
  132. // not actually a magic pattern, it's just that one literal character.
  133. // we should not treat that as "magic", we should just return the literal
  134. // character. [_] is a perfectly valid way to escape glob magic chars.
  135. if (negs.length === 0 &&
  136. ranges.length === 1 &&
  137. /^\\?.$/.test(ranges[0]) &&
  138. !negate) {
  139. const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
  140. return [regexpEscape(r), false, endPos - pos, false];
  141. }
  142. const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
  143. const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
  144. const comb = ranges.length && negs.length
  145. ? '(' + sranges + '|' + snegs + ')'
  146. : ranges.length
  147. ? sranges
  148. : snegs;
  149. return [comb, uflag, endPos - pos, true];
  150. };
  151. exports.parseClass = parseClass;
  152. //# sourceMappingURL=brace-expressions.js.map