@@ -203,9 +203,10 @@ def split(cls, tokens: List[Token], tree: SyntaxTree) -> Tuple[List[Token], List
203203 (['SELECT' ], KeywordSelectSplitter .split_select ),
204204 (['FROM' ], cls ._split_from ),
205205 (['WHERE' ], KeywordWhereSplitter .split_where ),
206- (['ORDER' ], cls ._split_order ),
207- (['GROUP' ], cls ._split_split ),
208- (['HAVING' ], cls ._split_having ),
206+ (['ORDER' ], cls ._split_orderby ),
207+ (['GROUP' ], cls ._split_groupby ),
208+ (['HAVING' ], KeywordHavingSplitter .split_having ),
209+ (['LIMIT' ], cls ._split_limit ),
209210 (['CASE' ], cls ._split_case ),
210211 (['WHEN' ], cls ._split_when ),
211212 (['INNER' , 'LEFT' , 'RIGHT' , 'FULL' , 'CROSS' , 'OUTER' , 'JOIN' ], KeywordJoinSplitter .split_join ),
@@ -220,6 +221,73 @@ def split(cls, tokens: List[Token], tree: SyntaxTree) -> Tuple[List[Token], List
220221
221222 return tokens , [], []
222223
224+ @classmethod
225+ def _split_groupby (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
226+ """Splits GROUP BY sequence """
227+
228+ group_token = Token (word = 'GROUP' , kind = Token .KEYWORD )
229+ stoppers = [
230+ Token (word = 'SELECT' , kind = Token .KEYWORD ),
231+ Token (word = 'WHERE' , kind = Token .KEYWORD ),
232+ Token (word = 'ORDER' , kind = Token .KEYWORD ),
233+ Token (word = 'HAVING' , kind = Token .KEYWORD ),
234+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
235+ ]
236+
237+ # Explores tokens until GROUP is closed by condition sequence corresponding it.
238+ group_count = 1
239+ bracket_count = 0
240+ for idx , token in enumerate (tokens [1 :]):
241+ bracket_count += (1 if token .kind == Token .BRACKET_LEFT else 0 )
242+ bracket_count += (- 1 if token .kind == Token .BRACKET_RIGHT else 0 )
243+
244+ if token == group_token and bracket_count == 0 :
245+ group_count += 1
246+
247+ if token in stoppers and group_count == 1 and bracket_count == 0 :
248+ if len (tokens ) >= 3 and tokens [2 ].kind == Token .BRACKET_LEFT :
249+ return tokens [0 :3 ], [tokens [3 :idx + 1 ]], tokens [idx + 1 :]
250+ else :
251+ return tokens [0 :2 ], [tokens [2 :idx + 1 ]], tokens [idx + 1 :]
252+
253+ return tokens [0 :2 ], [tokens [2 :]], []
254+
255+ @classmethod
256+ def _split_orderby (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
257+ """Splits ORDER BY sequence """
258+
259+ order_token = Token (word = 'ORDER' , kind = Token .KEYWORD )
260+ stoppers = [
261+ Token (word = 'SELECT' , kind = Token .KEYWORD ),
262+ Token (word = 'WHERE' , kind = Token .KEYWORD ),
263+ Token (word = 'GROUP' , kind = Token .KEYWORD ),
264+ Token (word = 'HAVING' , kind = Token .KEYWORD ),
265+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
266+ ]
267+
268+ # Explores tokens until GROUP is closed by condition sequence corresponding it.
269+ order_count = 1
270+ bracket_count = 0
271+ for idx , token in enumerate (tokens [1 :]):
272+ bracket_count += (1 if token .kind == Token .BRACKET_LEFT else 0 )
273+ bracket_count += (- 1 if token .kind == Token .BRACKET_RIGHT else 0 )
274+
275+ if token == order_token and bracket_count == 0 :
276+ order_count += 1
277+
278+ if token in stoppers and order_count == 1 and bracket_count == 0 :
279+ if len (tokens ) >= 3 and tokens [2 ].kind == Token .BRACKET_LEFT :
280+ return tokens [0 :3 ], [tokens [3 :idx + 1 ]], tokens [idx + 1 :]
281+ else :
282+ return tokens [0 :2 ], [tokens [2 :idx + 1 ]], tokens [idx + 1 :]
283+
284+ return tokens [0 :2 ], [tokens [2 :]], []
285+
286+ @classmethod
287+ def _split_limit (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
288+ """Splits ORDER BY sequence """
289+ return tokens [0 :1 ], [tokens [1 :]], []
290+
223291 @classmethod
224292 def _split_create (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
225293 stoppers = [
@@ -343,14 +411,7 @@ def _split_with(cls, tokens: List[Token]) -> Tuple[List[Token], List[List[Token]
343411
344412 @classmethod
345413 def _split_from (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
346- """Splits FROM sequence
347-
348- Args:
349- tokens:
350-
351- Returns:
352-
353- """
414+ """Splits FROM sequence """
354415
355416 from_token = Token (word = 'FROM' , kind = Token .KEYWORD )
356417 stoppers = [
@@ -359,6 +420,7 @@ def _split_from(cls, tokens: List[Token]) -> Tuple[List[Token], List[List[Token]
359420 Token (word = 'ORDER' , kind = Token .KEYWORD ),
360421 Token (word = 'GROUP' , kind = Token .KEYWORD ),
361422 Token (word = 'HAVING' , kind = Token .KEYWORD ),
423+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
362424 ]
363425
364426 # Explores tokens until FROM is closed by condition sequence corresponding it.
@@ -375,46 +437,10 @@ def _split_from(cls, tokens: List[Token]) -> Tuple[List[Token], List[List[Token]
375437 if tokens [1 ].kind == Token .BRACKET_LEFT :
376438 return tokens [0 :2 ], [tokens [2 :idx + 1 ]], tokens [idx + 1 :]
377439 else :
378- return tokens [0 :1 ], [tokens [1 :idx + 1 ]], tokens [idx + 1 :]
440+ return tokens [0 :1 ], [tokens [1 :idx + 1 ]], tokens [idx + 1 :]
379441
380442 return tokens [0 :1 ], [tokens [1 :]], []
381443
382- @classmethod
383- def _split_split (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
384- """Splits GROIP BY sequence
385-
386- Args:
387- tokens:
388-
389- Returns:
390-
391- """
392- return tokens [0 :2 ], [tokens [2 :]], []
393-
394- @classmethod
395- def _split_order (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
396- """Splits ORDER BY sequence
397-
398- Args:
399- tokens:
400-
401- Returns:
402-
403- """
404- return tokens [0 :2 ], [tokens [2 :]], []
405-
406- @classmethod
407- def _split_having (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
408- """Splits ORDER BY sequence
409-
410- Args:
411- tokens:
412-
413- Returns:
414-
415- """
416- return tokens [0 :1 ], [tokens [1 :]], []
417-
418444 @classmethod
419445 def _split_case (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
420446 """Splits WHEN sequence
@@ -530,6 +556,7 @@ def split_select(cls, tokens: List[Token]) -> Tuple[List[Token], List[List[Token
530556 Token (word = 'ORDER' , kind = Token .KEYWORD ),
531557 Token (word = 'GROUP' , kind = Token .KEYWORD ),
532558 Token (word = 'HAVING' , kind = Token .KEYWORD ),
559+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
533560 ]
534561
535562 # Explores tokens until SELECT is closed by FROM corresponding it.
@@ -568,6 +595,7 @@ def split_where(cls, tokens: List[Token]) -> Tuple[List[Token], List[List[Token]
568595 Token (word = 'ORDER' , kind = Token .KEYWORD ),
569596 Token (word = 'GROUP' , kind = Token .KEYWORD ),
570597 Token (word = 'HAVING' , kind = Token .KEYWORD ),
598+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
571599 ]
572600
573601 # Explores tokens until FROM is closed by condition sequence corresponding it.
@@ -643,6 +671,39 @@ def split_condiction(cls, tokens: List[Token]) -> List[List[Token]]:
643671 return result
644672
645673
674+ class KeywordHavingSplitter (KeywordSplitter ):
675+ @classmethod
676+ def split_having (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
677+ """Splits Having sequence """
678+
679+ having_token = Token (word = 'HAVING' , kind = Token .KEYWORD )
680+ stoppers = [
681+ Token (word = 'SELECT' , kind = Token .KEYWORD ),
682+ Token (word = 'WHERE' , kind = Token .KEYWORD ),
683+ Token (word = 'ORDER' , kind = Token .KEYWORD ),
684+ Token (word = 'GROUP' , kind = Token .KEYWORD ),
685+ Token (word = 'LIMIT' , kind = Token .KEYWORD ),
686+ ]
687+
688+ # Explores tokens until FROM is closed by condition sequence corresponding it.
689+ having_count = 1
690+ bracket_count = 0
691+ for idx , token in enumerate (tokens [1 :]):
692+ bracket_count += (1 if token .kind == Token .BRACKET_LEFT else 0 )
693+ bracket_count += (- 1 if token .kind == Token .BRACKET_RIGHT else 0 )
694+
695+ if token == having_token and bracket_count == 0 :
696+ having_count += 1
697+
698+ if token in stoppers and having_count == 1 and bracket_count == 0 :
699+ return (
700+ tokens [0 :1 ],
701+ KeywordWhereSplitter .split_condiction (tokens [1 :idx + 1 ]),
702+ tokens [idx + 1 :])
703+
704+ return tokens [0 :1 ], KeywordWhereSplitter .split_condiction (tokens [1 :]), []
705+
706+
646707class KeywordJoinSplitter (KeywordSplitter ):
647708 @classmethod
648709 def split_join (cls , tokens : List [Token ]) -> Tuple [List [Token ], List [List [Token ]], List [Token ]]:
0 commit comments