Skip to content

Commit 4364b25

Browse files
committed
Add group matching M_tokens and refactor group matching
remove slots in subclasses
1 parent f26719d commit 4364b25

3 files changed

Lines changed: 38 additions & 110 deletions

File tree

sqlparse/engine/grouping.py

Lines changed: 22 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from sqlparse import sql
66
from sqlparse import tokens as T
7-
from sqlparse.utils import recurse
7+
from sqlparse.utils import recurse, imt, find_matching
88

99

1010
def _group_left_right(tlist, ttype, value, cls,
@@ -47,68 +47,36 @@ def _group_left_right(tlist, ttype, value, cls,
4747
ttype, value)
4848

4949

50-
def _find_matching(idx, tlist, start_ttype, start_value, end_ttype, end_value):
51-
depth = 1
52-
for tok in tlist.tokens[idx:]:
53-
if tok.match(start_ttype, start_value):
54-
depth += 1
55-
elif tok.match(end_ttype, end_value):
56-
depth -= 1
57-
if depth == 1:
58-
return tok
59-
return None
60-
61-
62-
def _group_matching(tlist, start_ttype, start_value, end_ttype, end_value,
63-
cls, include_semicolon=False, recurse=False):
64-
65-
[_group_matching(sgroup, start_ttype, start_value, end_ttype, end_value,
66-
cls, include_semicolon) for sgroup in tlist.get_sublists()
67-
if recurse]
68-
if isinstance(tlist, cls):
69-
idx = 1
70-
else:
71-
idx = 0
72-
token = tlist.token_next_match(idx, start_ttype, start_value)
50+
def _group_matching(tlist, cls):
51+
"""Groups Tokens that have beginning and end. ie. parenthesis, brackets.."""
52+
idx = 1 if imt(tlist, i=cls) else 0
53+
54+
token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
7355
while token:
74-
tidx = tlist.token_index(token)
75-
end = _find_matching(tidx, tlist, start_ttype, start_value,
76-
end_ttype, end_value)
77-
if end is None:
78-
idx = tidx + 1
79-
else:
80-
if include_semicolon:
81-
next_ = tlist.token_next(tlist.token_index(end))
82-
if next_ and next_.match(T.Punctuation, ';'):
83-
end = next_
84-
group = tlist.group_tokens(cls, tlist.tokens_between(token, end))
85-
_group_matching(group, start_ttype, start_value,
86-
end_ttype, end_value, cls, include_semicolon)
87-
idx = tlist.token_index(group) + 1
88-
token = tlist.token_next_match(idx, start_ttype, start_value)
56+
end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
57+
if end is not None:
58+
token = tlist.group_tokens(cls, tlist.tokens_between(token, end))
59+
_group_matching(token, cls)
60+
token = tlist.token_next_by(m=cls.M_OPEN, idx=token)
8961

9062

9163
def group_if(tlist):
92-
_group_matching(tlist, T.Keyword, 'IF', T.Keyword, 'END IF', sql.If, True)
64+
_group_matching(tlist, sql.If)
9365

9466

9567
def group_for(tlist):
96-
_group_matching(tlist, T.Keyword, 'FOR', T.Keyword, 'END LOOP',
97-
sql.For, True)
68+
_group_matching(tlist, sql.For)
9869

9970

10071
def group_foreach(tlist):
101-
_group_matching(tlist, T.Keyword, 'FOREACH', T.Keyword, 'END LOOP',
102-
sql.For, True)
72+
_group_matching(tlist, sql.For)
10373

10474

10575
def group_begin(tlist):
106-
_group_matching(tlist, T.Keyword, 'BEGIN', T.Keyword, 'END',
107-
sql.Begin, True)
76+
_group_matching(tlist, sql.Begin)
10877

10978

11079
def group_as(tlist):
111-
11280
def _right_valid(token):
11381
# Currently limited to DML/DDL. Maybe additional more non SQL reserved
11482
# keywords should appear here (see issue8).
@@ -130,7 +98,6 @@ def group_assignment(tlist):
13098

13199

132100
def group_comparison(tlist):
133-
134101
def _parts_valid(token):
135102
return (token.ttype in (T.String.Symbol, T.String.Single,
136103
T.Name, T.Number, T.Number.Float,
@@ -140,13 +107,13 @@ def _parts_valid(token):
140107
sql.Function))
141108
or (token.ttype is T.Keyword
142109
and token.value.upper() in ['NULL', ]))
110+
143111
_group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
144112
check_left=_parts_valid, check_right=_parts_valid)
145113

146114

147115
def group_case(tlist):
148-
_group_matching(tlist, T.Keyword, 'CASE', T.Keyword, 'END', sql.Case,
149-
include_semicolon=True, recurse=True)
116+
_group_matching(tlist, sql.Case)
150117

151118

152119
def group_identifier(tlist):
@@ -222,7 +189,7 @@ def _next_token(tl, i):
222189
and (isinstance(identifier_tokens[0], (sql.Function,
223190
sql.Parenthesis))
224191
or identifier_tokens[0].ttype in (
225-
T.Literal.Number.Integer, T.Literal.Number.Float))):
192+
T.Literal.Number.Integer, T.Literal.Number.Float))):
226193
group = tlist.group_tokens(sql.Identifier, identifier_tokens)
227194
idx = tlist.token_index(group, start=idx) + 1
228195
else:
@@ -284,47 +251,11 @@ def group_identifier_list(tlist):
284251

285252

286253
def group_brackets(tlist):
287-
"""Group parentheses () or square brackets []
288-
289-
This is just like _group_matching, but complicated by the fact that
290-
round brackets can contain square bracket groups and vice versa
291-
"""
292-
293-
if isinstance(tlist, (sql.Parenthesis, sql.SquareBrackets)):
294-
idx = 1
295-
else:
296-
idx = 0
297-
298-
# Find the first opening bracket
299-
token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
300-
301-
while token:
302-
start_val = token.value # either '(' or '['
303-
if start_val == '(':
304-
end_val = ')'
305-
group_class = sql.Parenthesis
306-
else:
307-
end_val = ']'
308-
group_class = sql.SquareBrackets
309-
310-
tidx = tlist.token_index(token)
311-
312-
# Find the corresponding closing bracket
313-
end = _find_matching(tidx, tlist, T.Punctuation, start_val,
314-
T.Punctuation, end_val)
315-
316-
if end is None:
317-
idx = tidx + 1
318-
else:
319-
group = tlist.group_tokens(group_class,
320-
tlist.tokens_between(token, end))
254+
_group_matching(tlist, sql.SquareBrackets)
321255

322-
# Check for nested bracket groups within this group
323-
group_brackets(group)
324-
idx = tlist.token_index(group) + 1
325256

326-
# Find the next opening bracket
327-
token = tlist.token_next_match(idx, T.Punctuation, ['(', '['])
257+
def group_parenthesis(tlist):
258+
_group_matching(tlist, sql.Parenthesis)
328259

329260

330261
@recurse(sql.Comment)
@@ -431,6 +362,7 @@ def group(tlist):
431362
for func in [
432363
group_comments,
433364
group_brackets,
365+
group_parenthesis,
434366
group_functions,
435367
group_where,
436368
group_case,

sqlparse/sql.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -552,8 +552,6 @@ class Identifier(TokenList):
552552
Identifiers may have aliases or typecasts.
553553
"""
554554

555-
__slots__ = ('value', 'ttype', 'tokens')
556-
557555
def is_wildcard(self):
558556
"""Return ``True`` if this identifier contains a wildcard."""
559557
token = self.token_next_by_type(0, T.Wildcard)
@@ -588,8 +586,6 @@ def get_array_indices(self):
588586
class IdentifierList(TokenList):
589587
"""A list of :class:`~sqlparse.sql.Identifier`\'s."""
590588

591-
__slots__ = ('value', 'ttype', 'tokens')
592-
593589
def get_identifiers(self):
594590
"""Returns the identifiers.
595591
@@ -602,7 +598,8 @@ def get_identifiers(self):
602598

603599
class Parenthesis(TokenList):
604600
"""Tokens between parenthesis."""
605-
__slots__ = ('value', 'ttype', 'tokens')
601+
M_OPEN = (T.Punctuation, '(')
602+
M_CLOSE = (T.Punctuation, ')')
606603

607604
@property
608605
def _groupable_tokens(self):
@@ -611,8 +608,8 @@ def _groupable_tokens(self):
611608

612609
class SquareBrackets(TokenList):
613610
"""Tokens between square brackets"""
614-
615-
__slots__ = ('value', 'ttype', 'tokens')
611+
M_OPEN = (T.Punctuation, '[')
612+
M_CLOSE = (T.Punctuation, ']')
616613

617614
@property
618615
def _groupable_tokens(self):
@@ -621,22 +618,22 @@ def _groupable_tokens(self):
621618

622619
class Assignment(TokenList):
623620
"""An assignment like 'var := val;'"""
624-
__slots__ = ('value', 'ttype', 'tokens')
625621

626622

627623
class If(TokenList):
628624
"""An 'if' clause with possible 'else if' or 'else' parts."""
629-
__slots__ = ('value', 'ttype', 'tokens')
625+
M_OPEN = (T.Keyword, 'IF')
626+
M_CLOSE = (T.Keyword, 'END IF')
630627

631628

632629
class For(TokenList):
633630
"""A 'FOR' loop."""
634-
__slots__ = ('value', 'ttype', 'tokens')
631+
M_OPEN = (T.Keyword, ('FOR', 'FOREACH'))
632+
M_CLOSE = (T.Keyword, 'END LOOP')
635633

636634

637635
class Comparison(TokenList):
638636
"""A comparison used for example in WHERE clauses."""
639-
__slots__ = ('value', 'ttype', 'tokens')
640637

641638
@property
642639
def left(self):
@@ -649,21 +646,22 @@ def right(self):
649646

650647
class Comment(TokenList):
651648
"""A comment."""
652-
__slots__ = ('value', 'ttype', 'tokens')
653649

654650
def is_multiline(self):
655651
return self.tokens and self.tokens[0].ttype == T.Comment.Multiline
656652

657653

658654
class Where(TokenList):
659655
"""A WHERE clause."""
660-
__slots__ = ('value', 'ttype', 'tokens')
656+
M_OPEN = (T.Keyword, 'WHERE')
657+
M_CLOSE = (T.Keyword,
658+
('ORDER', 'GROUP', 'LIMIT', 'UNION', 'EXCEPT', 'HAVING'))
661659

662660

663661
class Case(TokenList):
664662
"""A CASE statement with one or more WHEN and possibly an ELSE part."""
665-
666-
__slots__ = ('value', 'ttype', 'tokens')
663+
M_OPEN = (T.Keyword, 'CASE')
664+
M_CLOSE = (T.Keyword, 'END')
667665

668666
def get_cases(self):
669667
"""Returns a list of 2-tuples (condition, value).
@@ -713,8 +711,6 @@ def get_cases(self):
713711
class Function(TokenList):
714712
"""A function or procedure call."""
715713

716-
__slots__ = ('value', 'ttype', 'tokens')
717-
718714
def get_parameters(self):
719715
"""Return a list of parameters."""
720716
parenthesis = self.tokens[-1]
@@ -728,5 +724,5 @@ def get_parameters(self):
728724

729725
class Begin(TokenList):
730726
"""A BEGIN/END block."""
731-
732-
__slots__ = ('value', 'ttype', 'tokens')
727+
M_OPEN = (T.Keyword, 'BEGIN')
728+
M_CLOSE = (T.Keyword, 'END')

sqlparse/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def imt(token, i=None, m=None, t=None):
166166
def find_matching(tlist, token, M1, M2):
167167
idx = tlist.token_index(token)
168168
depth = 0
169-
for token in tlist[idx:]:
169+
for token in tlist.tokens[idx:]:
170170
if token.match(*M1):
171171
depth += 1
172172
elif token.match(*M2):

0 commit comments

Comments
 (0)