3636<pseudo-class-selector> = ':' <ident-token> [ '(' <selector-list> ')' ]?
3737*/
3838use super :: Rule ;
39+ use ast_grep_core:: ops:: Any ;
3940use thiserror:: Error ;
4041
4142// Inspired by CSS Selector, see
@@ -44,7 +45,7 @@ use thiserror::Error;
4445#[ derive( Debug , Clone , PartialEq ) ]
4546enum Token < ' a > {
4647 Identifier ( & ' a str ) ,
47- /// + ~ >
48+ /// + ~ > or space ` `
4849 Combinator ( char ) ,
4950 /// .
5051 ClassDot ,
@@ -58,14 +59,56 @@ enum Token<'a> {
5859 Comma ,
5960}
6061
61- pub fn parse_selector ( source : & str ) -> Rule {
62+ fn parse_selector ( source : & str ) -> Result < Rule , SelectorError > {
63+ let mut input = Input :: new ( source) ;
64+ let ret = try_parse_selector ( & mut input) ?;
65+ if !input. is_empty ( ) {
66+ return Err ( SelectorError :: UnexpectedToken ) ;
67+ }
68+ Ok ( ret)
69+ }
70+
71+ fn try_parse_selector < ' a > ( input : & mut Input < ' a > ) -> Result < Rule , SelectorError > {
72+ let mut rules = vec ! [ ] ;
73+ while !input. is_empty ( ) {
74+ let complex_selector = parse_complex_selector ( input) ?;
75+ rules. push ( complex_selector) ;
76+ if let Some ( Token :: Comma ) = input. peek ( ) ? {
77+ input. next ( ) ?; // consume the comma
78+ } else if !input. is_empty ( ) {
79+ break ;
80+ }
81+ }
82+ Ok ( Rule :: Any ( Any :: new ( rules) ) )
83+ }
84+
85+ fn parse_complex_selector < ' a > ( input : & mut Input < ' a > ) -> Result < Rule , SelectorError > {
86+ let mut rule = parse_compound_selector ( input) ?;
87+ loop {
88+ let combinator = try_parse_combinator ( input) ?;
89+ }
90+ Ok ( rule)
91+ }
92+
93+ fn try_parse_combinator < ' a > ( input : & mut Input < ' a > ) -> Result < Option < char > , SelectorError > {
94+ let Some ( Token :: Combinator ( c) ) = input. peek ( ) ? else {
95+ return Ok ( None ) ;
96+ } ;
97+ let c = * c;
98+ input. next ( ) ?; // consume the combinator
99+ Ok ( Some ( c) )
100+ }
101+
102+ fn parse_compound_selector < ' a > ( input : & mut Input < ' a > ) -> Result < Rule , SelectorError > {
62103 todo ! ( )
63104}
64105
65106#[ derive( Debug , Error ) ]
66107enum SelectorError {
67108 #[ error( "Illegal character {0} encountered" ) ]
68109 IllegalCharacter ( char ) ,
110+ #[ error( "Unexpected token" ) ]
111+ UnexpectedToken ,
69112}
70113
71114struct Input < ' a > {
@@ -81,30 +124,53 @@ impl<'a> Input<'a> {
81124 }
82125 }
83126
127+ fn is_empty ( & self ) -> bool {
128+ self . source . is_empty ( ) && self . lookahead . is_none ( )
129+ }
130+
131+ fn consume_whitespace ( & mut self ) {
132+ self . source = self . source . trim_start ( ) ;
133+ }
134+
84135 fn do_next ( & mut self ) -> Result < Option < Token < ' a > > , SelectorError > {
85136 if self . source . is_empty ( ) {
86137 return Ok ( None ) ;
87138 }
88- let ( next_token, step) = match self . source . as_bytes ( ) [ 0 ] as char {
89- c @ ( '+' | '~' | '>' ) => ( Token :: Combinator ( c) , 1 ) ,
90- '.' => ( Token :: ClassDot , 1 ) ,
91- ':' => ( Token :: PseudoColon , 1 ) ,
92- '(' => ( Token :: LeftParen , 1 ) ,
93- ')' => ( Token :: RightParen , 1 ) ,
94- ',' => ( Token :: Comma , 1 ) ,
139+ let ( next_token, step, need_trim) = match self . source . as_bytes ( ) [ 0 ] as char {
140+ ' ' => {
141+ let len = self
142+ . source
143+ . find ( |c : char | !c. is_whitespace ( ) )
144+ . unwrap_or ( self . source . len ( ) ) ;
145+ let next_char = self . source . as_bytes ( ) [ len] as char ;
146+ if matches ! ( next_char, '+' | '~' | '>' ) {
147+ self . consume_whitespace ( ) ;
148+ return self . do_next ( ) ; // skip whitespace
149+ }
150+ ( Token :: Combinator ( ' ' ) , len, true )
151+ }
152+ c @ ( '+' | '~' | '>' ) => ( Token :: Combinator ( c) , 1 , true ) ,
153+ '.' => ( Token :: ClassDot , 1 , false ) ,
154+ ':' => ( Token :: PseudoColon , 1 , false ) ,
155+ '(' => ( Token :: LeftParen , 1 , true ) ,
156+ ')' => ( Token :: RightParen , 1 , false ) ,
157+ ',' => ( Token :: Comma , 1 , true ) ,
95158 'a' ..='z' | 'A' ..='Z' | '_' | '-' => {
96159 let len = self
97160 . source
98161 . find ( |c| !matches ! ( c, 'a' ..='z' | 'A' ..='Z' | '_' | '-' ) )
99162 . unwrap_or ( self . source . len ( ) ) ;
100163 let ident = & self . source [ ..len] ;
101- ( Token :: Identifier ( ident) , len)
164+ ( Token :: Identifier ( ident) , len, false )
102165 }
103166 c => {
104167 return Err ( SelectorError :: IllegalCharacter ( c) ) ;
105168 }
106169 } ;
107- self . source = self . source [ step..] . trim_start ( ) ;
170+ self . source = & self . source [ step..] ;
171+ if need_trim {
172+ self . consume_whitespace ( ) ;
173+ }
108174 Ok ( Some ( next_token) )
109175 }
110176
@@ -140,21 +206,24 @@ mod test {
140206 #[ test]
141207 fn test_valid_tokens ( ) -> Result < ( ) , SelectorError > {
142208 let tokens = input_to_tokens ( "call_expression + statement > .body :has, identifier" ) ?;
143- assert_eq ! (
144- tokens,
145- vec![
146- Token :: Identifier ( "call_expression" ) ,
147- Token :: Combinator ( '+' ) ,
148- Token :: Identifier ( "statement" ) ,
149- Token :: Combinator ( '>' ) ,
150- Token :: ClassDot ,
151- Token :: Identifier ( "body" ) ,
152- Token :: PseudoColon ,
153- Token :: Identifier ( "has" ) ,
154- Token :: Comma ,
155- Token :: Identifier ( "identifier" ) ,
156- ]
157- ) ;
209+ let expected = vec ! [
210+ Token :: Identifier ( "call_expression" ) ,
211+ Token :: Combinator ( '+' ) ,
212+ Token :: Identifier ( "statement" ) ,
213+ Token :: Combinator ( '>' ) ,
214+ Token :: ClassDot ,
215+ Token :: Identifier ( "body" ) ,
216+ Token :: Combinator ( ' ' ) ,
217+ Token :: PseudoColon ,
218+ Token :: Identifier ( "has" ) ,
219+ Token :: Comma ,
220+ Token :: Identifier ( "identifier" ) ,
221+ ] ;
222+ assert_eq ! ( tokens, expected) ;
223+ // Test with extra whitespace
224+ let tokens =
225+ input_to_tokens ( " call_expression + statement > .body :has, identifier " ) ?;
226+ assert_eq ! ( tokens, expected) ;
158227 Ok ( ( ) )
159228 }
160229
@@ -166,6 +235,7 @@ mod test {
166235 input. next( ) . unwrap( ) ,
167236 Some ( Token :: Identifier ( "call_expression" ) )
168237 ) ;
238+ assert_eq ! ( input. next( ) . unwrap( ) , Some ( Token :: Combinator ( ' ' ) ) ) ;
169239 assert ! ( matches!(
170240 input. next( ) ,
171241 Err ( SelectorError :: IllegalCharacter ( '$' ) )
0 commit comments