|
| 1 | +use std::fmt::Write; |
| 2 | + |
1 | 3 | use itertools::Itertools as _; |
| 4 | + |
2 | 5 | use oxc_diagnostics::OxcDiagnostic; |
3 | 6 | use oxc_macros::declare_oxc_lint; |
4 | 7 | use oxc_regular_expression::{ |
5 | | - ast::{CapturingGroup, Character, Pattern}, |
| 8 | + ast::{CapturingGroup, Character, CharacterKind, Pattern}, |
6 | 9 | visit::{Visit, walk}, |
7 | 10 | }; |
8 | 11 | use oxc_span::Span; |
9 | 12 |
|
10 | 13 | use crate::{AstNode, context::LintContext, rule::Rule, utils::run_on_regex_node}; |
11 | 14 |
|
12 | | -fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic { |
| 15 | +fn no_control_regex_diagnostic(control_chars: &[Character], span: Span) -> OxcDiagnostic { |
| 16 | + let count = control_chars.len(); |
13 | 17 | debug_assert!(count > 0); |
14 | | - let (message, help) = if count == 1 { |
15 | | - ("Unexpected control character", format!("'{regex}' is not a valid control character.")) |
16 | | - } else { |
17 | | - ("Unexpected control characters", format!("'{regex}' are not valid control characters.")) |
18 | | - }; |
19 | 18 |
|
20 | | - OxcDiagnostic::warn(message).with_help(help).with_label(span) |
21 | | -} |
| 19 | + let mut octal_chars = Vec::new(); |
| 20 | + let mut null_chars = Vec::new(); |
| 21 | + let mut other_chars = Vec::new(); |
22 | 22 |
|
| 23 | + for ch in control_chars { |
| 24 | + match ch.kind { |
| 25 | + CharacterKind::Octal1 | CharacterKind::Octal2 | CharacterKind::Octal3 => { |
| 26 | + octal_chars.push(ch); |
| 27 | + } |
| 28 | + CharacterKind::Null => { |
| 29 | + null_chars.push(ch); |
| 30 | + } |
| 31 | + _ => { |
| 32 | + other_chars.push(ch); |
| 33 | + } |
| 34 | + } |
| 35 | + } |
| 36 | + |
| 37 | + let mut help = String::new(); |
| 38 | + |
| 39 | + if !other_chars.is_empty() { |
| 40 | + let regexes = other_chars.iter().join(", "); |
| 41 | + writeln!( |
| 42 | + help, |
| 43 | + "'{regexes}' {} not {}valid control character{}.", |
| 44 | + if other_chars.len() > 1 { "are" } else { "is" }, |
| 45 | + if other_chars.len() > 1 { "" } else { "a " }, |
| 46 | + if other_chars.len() > 1 { "s" } else { "" }, |
| 47 | + ) |
| 48 | + .unwrap(); |
| 49 | + } |
| 50 | + |
| 51 | + if !octal_chars.is_empty() { |
| 52 | + let regexes = octal_chars.iter().join(", "); |
| 53 | + writeln!( |
| 54 | + help, |
| 55 | + "'{regexes}' {} not {}valid control character{}. They look like backreferences, but there {} no corresponding capture group{}. If you are trying to match the octal character, consider using '\\xNN' or '\\u00NN' instead.", |
| 56 | + if octal_chars.len() > 1 { "are" } else { "is" }, |
| 57 | + if octal_chars.len() > 1 { "" } else { "a " }, |
| 58 | + if octal_chars.len() > 1 { "s" } else { "" }, |
| 59 | + if octal_chars.len() > 1 { "are" } else { "is" }, |
| 60 | + if octal_chars.len() > 1 { "s" } else { "" } |
| 61 | + ).unwrap(); |
| 62 | + } |
| 63 | + |
| 64 | + if !null_chars.is_empty() { |
| 65 | + writeln!( |
| 66 | + help, |
| 67 | + "'\\0' matches the null character (U+0000), which is a control character. If you intend to match the null character, consider using '\\x00' or '\\u0000' for clarity." |
| 68 | + ).unwrap(); |
| 69 | + } |
| 70 | + |
| 71 | + debug_assert!(!help.is_empty()); |
| 72 | + debug_assert!(help.chars().last().is_some_and(|char| char == '\n')); |
| 73 | + |
| 74 | + if !help.is_empty() { |
| 75 | + help.truncate(help.len() - 1); |
| 76 | + } |
| 77 | + |
| 78 | + OxcDiagnostic::warn(if count > 1 { |
| 79 | + "Unexpected control characters" |
| 80 | + } else { |
| 81 | + "Unexpected control character" |
| 82 | + }) |
| 83 | + .with_help(help) |
| 84 | + .with_label(span) |
| 85 | +} |
23 | 86 | #[derive(Debug, Default, Clone)] |
24 | 87 | pub struct NoControlRegex; |
25 | 88 |
|
@@ -84,9 +147,7 @@ fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) { |
84 | 147 | finder.visit_pattern(pattern); |
85 | 148 |
|
86 | 149 | if !finder.control_chars.is_empty() { |
87 | | - let num_control_chars = finder.control_chars.len(); |
88 | | - let violations = finder.control_chars.into_iter().map(|c| c.to_string()).join(", "); |
89 | | - context.diagnostic(no_control_regex_diagnostic(num_control_chars, &violations, span)); |
| 150 | + context.diagnostic(no_control_regex_diagnostic(&finder.control_chars, span)); |
90 | 151 | } |
91 | 152 | } |
92 | 153 |
|
@@ -152,7 +213,7 @@ mod tests { |
152 | 213 | use super::*; |
153 | 214 | use crate::tester::Tester; |
154 | 215 |
|
155 | | - #[test] |
| 216 | + #[test] // |
156 | 217 | fn test_hex_literals() { |
157 | 218 | Tester::new( |
158 | 219 | NoControlRegex::NAME, |
@@ -298,6 +359,10 @@ mod tests { |
298 | 359 | r"/\x0d/u", |
299 | 360 | r"/\u{09}/u", |
300 | 361 | r"/\x09/u", |
| 362 | + r"/\0\1\2/", |
| 363 | + r"/\x1f\2/", |
| 364 | + r"/\x1f\0/", |
| 365 | + r"/\x1f\0\2/", |
301 | 366 | ], |
302 | 367 | ) |
303 | 368 | .test_and_snapshot(); |
|
0 commit comments