brace-expressions.js 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // translate the various posix character classes into unicode properties
  2. // this works across all unicode locales
  3. // { <posix class>: [<translation>, /u flag required, negated]
  4. const posixClasses = {
  5. '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
  6. '[:alpha:]': ['\\p{L}\\p{Nl}', true],
  7. '[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
  8. '[:blank:]': ['\\p{Zs}\\t', true],
  9. '[:cntrl:]': ['\\p{Cc}', true],
  10. '[:digit:]': ['\\p{Nd}', true],
  11. '[:graph:]': ['\\p{Z}\\p{C}', true, true],
  12. '[:lower:]': ['\\p{Ll}', true],
  13. '[:print:]': ['\\p{C}', true],
  14. '[:punct:]': ['\\p{P}', true],
  15. '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
  16. '[:upper:]': ['\\p{Lu}', true],
  17. '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
  18. '[:xdigit:]': ['A-Fa-f0-9', false],
  19. };
  20. // only need to escape a few things inside of brace expressions
  21. // escapes: [ \ ] -
  22. const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
  23. // escape all regexp magic characters
  24. const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
  25. // everything has already been escaped, we just have to join
  26. const rangesToString = (ranges) => ranges.join('');
  27. // takes a glob string at a posix brace expression, and returns
  28. // an equivalent regular expression source, and boolean indicating
  29. // whether the /u flag needs to be applied, and the number of chars
  30. // consumed to parse the character class.
  31. // This also removes out of order ranges, and returns ($.) if the
  32. // entire class just no good.
  33. export const parseClass = (glob, position) => {
  34. const pos = position;
  35. /* c8 ignore start */
  36. if (glob.charAt(pos) !== '[') {
  37. throw new Error('not in a brace expression');
  38. }
  39. /* c8 ignore stop */
  40. const ranges = [];
  41. const negs = [];
  42. let i = pos + 1;
  43. let sawStart = false;
  44. let uflag = false;
  45. let escaping = false;
  46. let negate = false;
  47. let endPos = pos;
  48. let rangeStart = '';
  49. WHILE: while (i < glob.length) {
  50. const c = glob.charAt(i);
  51. if ((c === '!' || c === '^') && i === pos + 1) {
  52. negate = true;
  53. i++;
  54. continue;
  55. }
  56. if (c === ']' && sawStart && !escaping) {
  57. endPos = i + 1;
  58. break;
  59. }
  60. sawStart = true;
  61. if (c === '\\') {
  62. if (!escaping) {
  63. escaping = true;
  64. i++;
  65. continue;
  66. }
  67. // escaped \ char, fall through and treat like normal char
  68. }
  69. if (c === '[' && !escaping) {
  70. // either a posix class, a collation equivalent, or just a [
  71. for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
  72. if (glob.startsWith(cls, i)) {
  73. // invalid, [a-[] is fine, but not [a-[:alpha]]
  74. if (rangeStart) {
  75. return ['$.', false, glob.length - pos, true];
  76. }
  77. i += cls.length;
  78. if (neg)
  79. negs.push(unip);
  80. else
  81. ranges.push(unip);
  82. uflag = uflag || u;
  83. continue WHILE;
  84. }
  85. }
  86. }
  87. // now it's just a normal character, effectively
  88. escaping = false;
  89. if (rangeStart) {
  90. // throw this range away if it's not valid, but others
  91. // can still match.
  92. if (c > rangeStart) {
  93. ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
  94. }
  95. else if (c === rangeStart) {
  96. ranges.push(braceEscape(c));
  97. }
  98. rangeStart = '';
  99. i++;
  100. continue;
  101. }
  102. // now might be the start of a range.
  103. // can be either c-d or c-] or c<more...>] or c] at this point
  104. if (glob.startsWith('-]', i + 1)) {
  105. ranges.push(braceEscape(c + '-'));
  106. i += 2;
  107. continue;
  108. }
  109. if (glob.startsWith('-', i + 1)) {
  110. rangeStart = c;
  111. i += 2;
  112. continue;
  113. }
  114. // not the start of a range, just a single character
  115. ranges.push(braceEscape(c));
  116. i++;
  117. }
  118. if (endPos < i) {
  119. // didn't see the end of the class, not a valid class,
  120. // but might still be valid as a literal match.
  121. return ['', false, 0, false];
  122. }
  123. // if we got no ranges and no negates, then we have a range that
  124. // cannot possibly match anything, and that poisons the whole glob
  125. if (!ranges.length && !negs.length) {
  126. return ['$.', false, glob.length - pos, true];
  127. }
  128. // if we got one positive range, and it's a single character, then that's
  129. // not actually a magic pattern, it's just that one literal character.
  130. // we should not treat that as "magic", we should just return the literal
  131. // character. [_] is a perfectly valid way to escape glob magic chars.
  132. if (negs.length === 0 &&
  133. ranges.length === 1 &&
  134. /^\\?.$/.test(ranges[0]) &&
  135. !negate) {
  136. const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
  137. return [regexpEscape(r), false, endPos - pos, false];
  138. }
  139. const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
  140. const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
  141. const comb = ranges.length && negs.length
  142. ? '(' + sranges + '|' + snegs + ')'
  143. : ranges.length
  144. ? sranges
  145. : snegs;
  146. return [comb, uflag, endPos - pos, true];
  147. };
  148. //# sourceMappingURL=brace-expressions.js.map