Skip to content

Commit 10e77d7

Browse files
committed
fix(linter): improve diagnostics for no-control-regex (#10959)
closes #10950, by improving the diagnostic when (deprecated) octal escapes are used
1 parent 53329f8 commit 10e77d7

File tree

3 files changed

+82
-17
lines changed

3 files changed

+82
-17
lines changed

crates/oxc_linter/src/rules/eslint/no_control_regex.rs

Lines changed: 78 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,88 @@
1+
use std::fmt::Write;
2+
13
use itertools::Itertools as _;
4+
25
use oxc_diagnostics::OxcDiagnostic;
36
use oxc_macros::declare_oxc_lint;
47
use oxc_regular_expression::{
5-
ast::{CapturingGroup, Character, Pattern},
8+
ast::{CapturingGroup, Character, CharacterKind, Pattern},
69
visit::{Visit, walk},
710
};
811
use oxc_span::Span;
912

1013
use crate::{AstNode, context::LintContext, rule::Rule, utils::run_on_regex_node};
1114

12-
fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic {
15+
fn no_control_regex_diagnostic(control_chars: &[Character], span: Span) -> OxcDiagnostic {
16+
let count = control_chars.len();
1317
debug_assert!(count > 0);
14-
let (message, help) = if count == 1 {
15-
("Unexpected control character", format!("'{regex}' is not a valid control character."))
16-
} else {
17-
("Unexpected control characters", format!("'{regex}' are not valid control characters."))
18-
};
1918

20-
OxcDiagnostic::warn(message).with_help(help).with_label(span)
21-
}
19+
let mut octal_chars = Vec::new();
20+
let mut null_chars = Vec::new();
21+
let mut other_chars = Vec::new();
2222

23+
for ch in control_chars {
24+
match ch.kind {
25+
CharacterKind::Octal1 | CharacterKind::Octal2 | CharacterKind::Octal3 => {
26+
octal_chars.push(ch);
27+
}
28+
CharacterKind::Null => {
29+
null_chars.push(ch);
30+
}
31+
_ => {
32+
other_chars.push(ch);
33+
}
34+
}
35+
}
36+
37+
let mut help = String::new();
38+
39+
if !other_chars.is_empty() {
40+
let regexes = other_chars.iter().join(", ");
41+
writeln!(
42+
help,
43+
"'{regexes}' {} not {}valid control character{}.",
44+
if other_chars.len() > 1 { "are" } else { "is" },
45+
if other_chars.len() > 1 { "" } else { "a " },
46+
if other_chars.len() > 1 { "s" } else { "" },
47+
)
48+
.unwrap();
49+
}
50+
51+
if !octal_chars.is_empty() {
52+
let regexes = octal_chars.iter().join(", ");
53+
writeln!(
54+
help,
55+
"'{regexes}' {} not {}valid control character{}. They look like backreferences, but there {} no corresponding capture group{}. If you are trying to match the octal character, consider using '\\xNN' or '\\u00NN' instead.",
56+
if octal_chars.len() > 1 { "are" } else { "is" },
57+
if octal_chars.len() > 1 { "" } else { "a " },
58+
if octal_chars.len() > 1 { "s" } else { "" },
59+
if octal_chars.len() > 1 { "are" } else { "is" },
60+
if octal_chars.len() > 1 { "s" } else { "" }
61+
).unwrap();
62+
}
63+
64+
if !null_chars.is_empty() {
65+
writeln!(
66+
help,
67+
"'\\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\\x00' or '\\u0000' for clarity."
68+
).unwrap();
69+
}
70+
71+
debug_assert!(!help.is_empty());
72+
debug_assert!(help.chars().last().is_some_and(|char| char == '\n'));
73+
74+
if !help.is_empty() {
75+
help.truncate(help.len() - 1);
76+
}
77+
78+
OxcDiagnostic::warn(if count > 1 {
79+
"Unexpected control characters"
80+
} else {
81+
"Unexpected control character"
82+
})
83+
.with_help(help)
84+
.with_label(span)
85+
}
2386
#[derive(Debug, Default, Clone)]
2487
pub struct NoControlRegex;
2588

@@ -84,9 +147,7 @@ fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) {
84147
finder.visit_pattern(pattern);
85148

86149
if !finder.control_chars.is_empty() {
87-
let num_control_chars = finder.control_chars.len();
88-
let violations = finder.control_chars.into_iter().map(|c| c.to_string()).join(", ");
89-
context.diagnostic(no_control_regex_diagnostic(num_control_chars, &violations, span));
150+
context.diagnostic(no_control_regex_diagnostic(&finder.control_chars, span));
90151
}
91152
}
92153

@@ -152,7 +213,7 @@ mod tests {
152213
use super::*;
153214
use crate::tester::Tester;
154215

155-
#[test]
216+
#[test] //
156217
fn test_hex_literals() {
157218
Tester::new(
158219
NoControlRegex::NAME,
@@ -298,6 +359,10 @@ mod tests {
298359
r"/\x0d/u",
299360
r"/\u{09}/u",
300361
r"/\x09/u",
362+
r"/\0\1\2/",
363+
r"/\x1f\2/",
364+
r"/\x1f\0/",
365+
r"/\x1f\0\2/",
301366
],
302367
)
303368
.test_and_snapshot();
1.93 KB
Binary file not shown.

crates/oxc_linter/src/snapshots/[email protected]

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,25 @@ source: crates/oxc_linter/src/tester.rs
66
1const r = /\0/;
77
· ────
88
╰────
9-
help: '\0' is not a valid control character.
9+
help: '\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\x00' or '\u0000' for clarity.
1010

1111
eslint(no-control-regex): Unexpected control character
1212
╭─[no_control_regex.tsx:1:11]
1313
1const r = /[a-z]\1/;
1414
· ─────────
1515
╰────
16-
help: '\1' is not a valid control character.
16+
help: '\1' is not a valid control character. They look like backreferences, but there is no corresponding capture group. If you are trying to match the octal character, consider using '\xNN' or '\u00NN' instead.
1717

1818
eslint(no-control-regex): Unexpected control character
1919
╭─[no_control_regex.tsx:1:11]
2020
1const r = /([a-z])\2/;
2121
· ───────────
2222
╰────
23-
help: '\2' is not a valid control character.
23+
help: '\2' is not a valid control character. They look like backreferences, but there is no corresponding capture group. If you are trying to match the octal character, consider using '\xNN' or '\u00NN' instead.
2424

2525
eslint(no-control-regex): Unexpected control character
2626
╭─[no_control_regex.tsx:1:11]
2727
1const r = /([a-z])\0/;
2828
· ───────────
2929
╰────
30-
help: '\0' is not a valid control character.
30+
help: '\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\x00' or '\u0000' for clarity.

0 commit comments

Comments
 (0)