Skip to content

Commit 7f029fa

Browse files
fix: handle whitespace
1 parent 94fbb00 commit 7f029fa

File tree

1 file changed

+96
-26
lines changed

1 file changed

+96
-26
lines changed

crates/config/src/rule/selector.rs

Lines changed: 96 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
<pseudo-class-selector> = ':' <ident-token> [ '(' <selector-list> ')' ]?
3737
*/
3838
use super::Rule;
39+
use ast_grep_core::ops::Any;
3940
use thiserror::Error;
4041

4142
// Inspired by CSS Selector, see
@@ -44,7 +45,7 @@ use thiserror::Error;
4445
#[derive(Debug, Clone, PartialEq)]
4546
enum Token<'a> {
4647
Identifier(&'a str),
47-
/// + ~ >
48+
/// + ~ > or space ` `
4849
Combinator(char),
4950
/// .
5051
ClassDot,
@@ -58,14 +59,56 @@ enum Token<'a> {
5859
Comma,
5960
}
6061

61-
pub fn parse_selector(source: &str) -> Rule {
62+
fn parse_selector(source: &str) -> Result<Rule, SelectorError> {
63+
let mut input = Input::new(source);
64+
let ret = try_parse_selector(&mut input)?;
65+
if !input.is_empty() {
66+
return Err(SelectorError::UnexpectedToken);
67+
}
68+
Ok(ret)
69+
}
70+
71+
fn try_parse_selector<'a>(input: &mut Input<'a>) -> Result<Rule, SelectorError> {
72+
let mut rules = vec![];
73+
while !input.is_empty() {
74+
let complex_selector = parse_complex_selector(input)?;
75+
rules.push(complex_selector);
76+
if let Some(Token::Comma) = input.peek()? {
77+
input.next()?; // consume the comma
78+
} else if !input.is_empty() {
79+
break;
80+
}
81+
}
82+
Ok(Rule::Any(Any::new(rules)))
83+
}
84+
85+
fn parse_complex_selector<'a>(input: &mut Input<'a>) -> Result<Rule, SelectorError> {
86+
let mut rule = parse_compound_selector(input)?;
87+
loop {
88+
let combinator = try_parse_combinator(input)?;
89+
}
90+
Ok(rule)
91+
}
92+
93+
fn try_parse_combinator<'a>(input: &mut Input<'a>) -> Result<Option<char>, SelectorError> {
94+
let Some(Token::Combinator(c)) = input.peek()? else {
95+
return Ok(None);
96+
};
97+
let c = *c;
98+
input.next()?; // consume the combinator
99+
Ok(Some(c))
100+
}
101+
102+
fn parse_compound_selector<'a>(input: &mut Input<'a>) -> Result<Rule, SelectorError> {
62103
todo!()
63104
}
64105

65106
#[derive(Debug, Error)]
66107
enum SelectorError {
67108
#[error("Illegal character {0} encountered")]
68109
IllegalCharacter(char),
110+
#[error("Unexpected token")]
111+
UnexpectedToken,
69112
}
70113

71114
struct Input<'a> {
@@ -81,30 +124,53 @@ impl<'a> Input<'a> {
81124
}
82125
}
83126

127+
fn is_empty(&self) -> bool {
128+
self.source.is_empty() && self.lookahead.is_none()
129+
}
130+
131+
fn consume_whitespace(&mut self) {
132+
self.source = self.source.trim_start();
133+
}
134+
84135
fn do_next(&mut self) -> Result<Option<Token<'a>>, SelectorError> {
85136
if self.source.is_empty() {
86137
return Ok(None);
87138
}
88-
let (next_token, step) = match self.source.as_bytes()[0] as char {
89-
c @ ('+' | '~' | '>') => (Token::Combinator(c), 1),
90-
'.' => (Token::ClassDot, 1),
91-
':' => (Token::PseudoColon, 1),
92-
'(' => (Token::LeftParen, 1),
93-
')' => (Token::RightParen, 1),
94-
',' => (Token::Comma, 1),
139+
let (next_token, step, need_trim) = match self.source.as_bytes()[0] as char {
140+
' ' => {
141+
let len = self
142+
.source
143+
.find(|c: char| !c.is_whitespace())
144+
.unwrap_or(self.source.len());
145+
let next_char = self.source.as_bytes()[len] as char;
146+
if matches!(next_char, '+' | '~' | '>') {
147+
self.consume_whitespace();
148+
return self.do_next(); // skip whitespace
149+
}
150+
(Token::Combinator(' '), len, true)
151+
}
152+
c @ ('+' | '~' | '>') => (Token::Combinator(c), 1, true),
153+
'.' => (Token::ClassDot, 1, false),
154+
':' => (Token::PseudoColon, 1, false),
155+
'(' => (Token::LeftParen, 1, true),
156+
')' => (Token::RightParen, 1, false),
157+
',' => (Token::Comma, 1, true),
95158
'a'..='z' | 'A'..='Z' | '_' | '-' => {
96159
let len = self
97160
.source
98161
.find(|c| !matches!(c, 'a'..='z' | 'A'..='Z' | '_' | '-'))
99162
.unwrap_or(self.source.len());
100163
let ident = &self.source[..len];
101-
(Token::Identifier(ident), len)
164+
(Token::Identifier(ident), len, false)
102165
}
103166
c => {
104167
return Err(SelectorError::IllegalCharacter(c));
105168
}
106169
};
107-
self.source = self.source[step..].trim_start();
170+
self.source = &self.source[step..];
171+
if need_trim {
172+
self.consume_whitespace();
173+
}
108174
Ok(Some(next_token))
109175
}
110176

@@ -140,21 +206,24 @@ mod test {
140206
#[test]
141207
fn test_valid_tokens() -> Result<(), SelectorError> {
142208
let tokens = input_to_tokens("call_expression + statement > .body :has, identifier")?;
143-
assert_eq!(
144-
tokens,
145-
vec![
146-
Token::Identifier("call_expression"),
147-
Token::Combinator('+'),
148-
Token::Identifier("statement"),
149-
Token::Combinator('>'),
150-
Token::ClassDot,
151-
Token::Identifier("body"),
152-
Token::PseudoColon,
153-
Token::Identifier("has"),
154-
Token::Comma,
155-
Token::Identifier("identifier"),
156-
]
157-
);
209+
let expected = vec![
210+
Token::Identifier("call_expression"),
211+
Token::Combinator('+'),
212+
Token::Identifier("statement"),
213+
Token::Combinator('>'),
214+
Token::ClassDot,
215+
Token::Identifier("body"),
216+
Token::Combinator(' '),
217+
Token::PseudoColon,
218+
Token::Identifier("has"),
219+
Token::Comma,
220+
Token::Identifier("identifier"),
221+
];
222+
assert_eq!(tokens, expected);
223+
// Test with extra whitespace
224+
let tokens =
225+
input_to_tokens(" call_expression + statement > .body :has, identifier ")?;
226+
assert_eq!(tokens, expected);
158227
Ok(())
159228
}
160229

@@ -166,6 +235,7 @@ mod test {
166235
input.next().unwrap(),
167236
Some(Token::Identifier("call_expression"))
168237
);
238+
assert_eq!(input.next().unwrap(), Some(Token::Combinator(' ')));
169239
assert!(matches!(
170240
input.next(),
171241
Err(SelectorError::IllegalCharacter('$'))

0 commit comments

Comments
 (0)