123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537 |
- "use strict";
- const {
- CALL,
- CONSTRUCT,
- ReferenceTracker,
- getStaticValue,
- getStringIfConstant
- } = require("@eslint-community/eslint-utils");
- const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
- const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
- const astUtils = require("./utils/ast-utils.js");
- const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
- const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");
- function *iterateCharacterSequence(nodes) {
-
- let seq = [];
- for (const node of nodes) {
- switch (node.type) {
- case "Character":
- seq.push(node);
- break;
- case "CharacterClassRange":
- seq.push(node.min);
- yield seq;
- seq = [node.max];
- break;
- case "CharacterSet":
- case "CharacterClass":
- case "ClassStringDisjunction":
- case "ExpressionCharacterClass":
- if (seq.length > 0) {
- yield seq;
- seq = [];
- }
- break;
-
- }
- }
- if (seq.length > 0) {
- yield seq;
- }
- }
- function isUnicodeCodePointEscape(char) {
- return /^\\u\{[\da-f]+\}$/iu.test(char.raw);
- }
- const findCharacterSequences = {
- *surrogatePairWithoutUFlag(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isSurrogatePair(previous.value, char.value) &&
- !isUnicodeCodePointEscape(previous) &&
- !isUnicodeCodePointEscape(char)
- ) {
- yield [previous, char];
- }
- }
- },
- *surrogatePair(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isSurrogatePair(previous.value, char.value) &&
- (
- isUnicodeCodePointEscape(previous) ||
- isUnicodeCodePointEscape(char)
- )
- ) {
- yield [previous, char];
- }
- }
- },
- *combiningClass(chars, unfilteredChars) {
-
- for (const [index, char] of chars.entries()) {
- const previous = unfilteredChars[index - 1];
- if (
- previous && char &&
- isCombiningCharacter(char.value) &&
- !isCombiningCharacter(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *emojiModifier(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isEmojiModifier(char.value) &&
- !isEmojiModifier(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *regionalIndicatorSymbol(chars) {
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- if (
- previous && char &&
- isRegionalIndicatorSymbol(char.value) &&
- isRegionalIndicatorSymbol(previous.value)
- ) {
- yield [previous, char];
- }
- }
- },
- *zwj(chars) {
- let sequence = null;
- for (const [index, char] of chars.entries()) {
- const previous = chars[index - 1];
- const next = chars[index + 1];
- if (
- previous && char && next &&
- char.value === 0x200d &&
- previous.value !== 0x200d &&
- next.value !== 0x200d
- ) {
- if (sequence) {
- if (sequence.at(-1) === previous) {
- sequence.push(char, next);
- } else {
- yield sequence;
- sequence = chars.slice(index - 1, index + 2);
- }
- } else {
- sequence = chars.slice(index - 1, index + 2);
- }
- }
- }
- if (sequence) {
- yield sequence;
- }
- }
- };
- const kinds = Object.keys(findCharacterSequences);
- function getStaticValueOrRegex(node, initialScope) {
- if (!node) {
- return null;
- }
- if (node.type === "Literal" && node.regex) {
- return { regex: node.regex };
- }
- const staticValue = getStaticValue(node, initialScope);
- if (staticValue?.value instanceof RegExp) {
- return null;
- }
- return staticValue;
- }
- function checkForAcceptableEscape(char, charSource) {
- if (!charSource.startsWith("\\")) {
- return false;
- }
- const match = /(?<=^\\+).$/su.exec(charSource);
- return match?.[0] !== String.fromCodePoint(char.value);
- }
- function checkForAcceptableEscapeInString(char, nodeSource, codeUnits) {
- const firstIndex = char.start;
- const lastIndex = char.end - 1;
- const start = codeUnits[firstIndex].start;
- const end = codeUnits[lastIndex].end;
- const charSource = nodeSource.slice(start, end);
- return checkForAcceptableEscape(char, charSource);
- }
- module.exports = {
- meta: {
- type: "problem",
- docs: {
- description: "Disallow characters which are made with multiple code points in character class syntax",
- recommended: true,
- url: "https://eslint.org/docs/latest/rules/no-misleading-character-class"
- },
- hasSuggestions: true,
- schema: [
- {
- type: "object",
- properties: {
- allowEscape: {
- type: "boolean",
- default: false
- }
- },
- additionalProperties: false
- }
- ],
- messages: {
- surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
- surrogatePair: "Unexpected surrogate pair in character class.",
- combiningClass: "Unexpected combined character in character class.",
- emojiModifier: "Unexpected modified Emoji in character class.",
- regionalIndicatorSymbol: "Unexpected national flag in character class.",
- zwj: "Unexpected joined character sequence in character class.",
- suggestUnicodeFlag: "Add unicode 'u' flag to regex."
- }
- },
- create(context) {
- const allowEscape = context.options[0]?.allowEscape;
- const sourceCode = context.sourceCode;
- const parser = new RegExpParser();
- const checkedPatternNodes = new Set();
-
- function verify(node, pattern, flags, unicodeFixer) {
- let patternNode;
- try {
- patternNode = parser.parsePattern(
- pattern,
- 0,
- pattern.length,
- {
- unicode: flags.includes("u"),
- unicodeSets: flags.includes("v")
- }
- );
- } catch {
-
- return;
- }
- let codeUnits = null;
-
- function isAcceptableEscapeSequence(char) {
- if (node.type === "Literal" && node.regex) {
- return checkForAcceptableEscape(char, char.raw);
- }
- if (node.type === "Literal" && typeof node.value === "string") {
- const nodeSource = node.raw;
- codeUnits ??= parseStringLiteral(nodeSource);
- return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
- }
- if (astUtils.isStaticTemplateLiteral(node)) {
- const nodeSource = sourceCode.getText(node);
- codeUnits ??= parseTemplateToken(nodeSource);
- return checkForAcceptableEscapeInString(char, nodeSource, codeUnits);
- }
- return false;
- }
- const foundKindMatches = new Map();
- visitRegExpAST(patternNode, {
- onCharacterClassEnter(ccNode) {
- for (const unfilteredChars of iterateCharacterSequence(ccNode.elements)) {
- let chars;
- if (allowEscape) {
-
- chars = unfilteredChars.map(char => (isAcceptableEscapeSequence(char) ? null : char));
- } else {
- chars = unfilteredChars;
- }
- for (const kind of kinds) {
- const matches = findCharacterSequences[kind](chars, unfilteredChars);
- if (foundKindMatches.has(kind)) {
- foundKindMatches.get(kind).push(...matches);
- } else {
- foundKindMatches.set(kind, [...matches]);
- }
- }
- }
- }
- });
-
- function getNodeReportLocations(matches) {
- if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") {
- return matches.length ? [node.loc] : [];
- }
- return matches.map(chars => {
- const firstIndex = chars[0].start;
- const lastIndex = chars.at(-1).end - 1;
- let start;
- let end;
- if (node.type === "TemplateLiteral") {
- const source = sourceCode.getText(node);
- const offset = node.range[0];
- codeUnits ??= parseTemplateToken(source);
- start = offset + codeUnits[firstIndex].start;
- end = offset + codeUnits[lastIndex].end;
- } else if (typeof node.value === "string") {
- const source = node.raw;
- const offset = node.range[0];
- codeUnits ??= parseStringLiteral(source);
- start = offset + codeUnits[firstIndex].start;
- end = offset + codeUnits[lastIndex].end;
- } else {
- const offset = node.range[0] + 1;
- start = offset + firstIndex;
- end = offset + lastIndex + 1;
- }
- return {
- start: sourceCode.getLocFromIndex(start),
- end: sourceCode.getLocFromIndex(end)
- };
- });
- }
- for (const [kind, matches] of foundKindMatches) {
- let suggest;
- if (kind === "surrogatePairWithoutUFlag") {
- suggest = [{
- messageId: "suggestUnicodeFlag",
- fix: unicodeFixer
- }];
- }
- const locs = getNodeReportLocations(matches);
- for (const loc of locs) {
- context.report({
- node,
- loc,
- messageId: kind,
- suggest
- });
- }
- }
- }
- return {
- "Literal[regex]"(node) {
- if (checkedPatternNodes.has(node)) {
- return;
- }
- verify(node, node.regex.pattern, node.regex.flags, fixer => {
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
- return null;
- }
- return fixer.insertTextAfter(node, "u");
- });
- },
- "Program"(node) {
- const scope = sourceCode.getScope(node);
- const tracker = new ReferenceTracker(scope);
-
- for (const { node: refNode } of tracker.iterateGlobalReferences({
- RegExp: { [CALL]: true, [CONSTRUCT]: true }
- })) {
- let pattern, flags;
- const [patternNode, flagsNode] = refNode.arguments;
- const evaluatedPattern = getStaticValueOrRegex(patternNode, scope);
- if (!evaluatedPattern) {
- continue;
- }
- if (flagsNode) {
- if (evaluatedPattern.regex) {
- pattern = evaluatedPattern.regex.pattern;
- checkedPatternNodes.add(patternNode);
- } else {
- pattern = String(evaluatedPattern.value);
- }
- flags = getStringIfConstant(flagsNode, scope);
- } else {
- if (evaluatedPattern.regex) {
- continue;
- }
- pattern = String(evaluatedPattern.value);
- flags = "";
- }
- if (typeof flags === "string") {
- verify(patternNode, pattern, flags, fixer => {
- if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
- return null;
- }
- if (refNode.arguments.length === 1) {
- const penultimateToken = sourceCode.getLastToken(refNode, { skip: 1 });
- return fixer.insertTextAfter(
- penultimateToken,
- astUtils.isCommaToken(penultimateToken)
- ? ' "u",'
- : ', "u"'
- );
- }
- if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
- const range = [flagsNode.range[0], flagsNode.range[1] - 1];
- return fixer.insertTextAfterRange(range, "u");
- }
- return null;
- });
- }
- }
- }
- };
- }
- };
|