Replace _group_matching with an inward-out grouping algorithm

sjoerdjob · vmuriart · commit d4cc0644c834 · 2016-06-12T17:33:15.000-07:00
All the matching between open/close was done all the time, first finding
the matching closing token, and then grouping the tokens in between, and
recurse over the newly created list.

Instead, it is more efficient to look for the previous open-token on
finding a closing-token, group these two together, and then continue on.

squashed: Handle token indices in group_tokens_between and find_matching.
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
@@ -2,7 +2,7 @@
 
 from sqlparse import sql
 from sqlparse import tokens as T
-from sqlparse.utils import recurse, imt, find_matching
+from sqlparse.utils import recurse, imt
 
 M_ROLE = (T.Keyword, ('null', 'role'))
 M_SEMICOLON = (T.Punctuation, ';')
@@ -39,13 +39,25 @@ def _group_matching(tlist, cls):
     """Groups Tokens that have beginning and end. ie. parenthesis, brackets.."""
     idx = 1 if imt(tlist, i=cls) else 0
 
-    token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
-    while token:
-        end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
-        if end is not None:
-            token = tlist.group_tokens_between(cls, token, end)
-            _group_matching(token, cls)
-        token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1)
+    opens = []
+
+    while True:
+        try:
+            token = tlist.tokens[idx]
+        except IndexError:
+            break
+
+        if token.match(*cls.M_OPEN):
+            opens.append(idx)
+        elif token.match(*cls.M_CLOSE):
+            try:
+                open_idx = opens.pop()
+            except IndexError:
+                break
+            tlist.group_tokens_between(cls, open_idx, idx)
+            idx = open_idx
+
+        idx += 1
 
 
 def group_if(tlist):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
@@ -331,9 +331,14 @@ def tokens_between(self, start, end, include_end=True):
 
     def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False):
         """Replace tokens by an instance of *grp_cls*."""
-        start_idx = self.token_index(start)
-        end_idx = self.token_index(end) + include_end
-        tokens = self.tokens[start_idx:end_idx]
+        if isinstance(start, int):
+            start_idx = start
+            start = self.tokens[start_idx]
+        else:
+            start_idx = self.token_index(start)
+
+        end_idx = self.token_index(end) if not isinstance(end, int) else end
+        end_idx += include_end
 
         if extend and isinstance(start, grp_cls):
             subtokens = self.tokens[start_idx+1:end_idx]
@@ -344,7 +349,7 @@ def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=Fal
             grp.value = start.__str__()
         else:
             subtokens = self.tokens[start_idx:end_idx]
-            grp = grp_cls(tokens)
+            grp = grp_cls(subtokens)
             self.tokens[start_idx:end_idx] = [grp]
             grp.parent = self
 
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
@@ -164,7 +164,7 @@ def imt(token, i=None, m=None, t=None):
 
 
 def find_matching(tlist, token, M1, M2):
-    idx = tlist.token_index(token)
+    idx = tlist.token_index(token) if not isinstance(token, int) else token
     depth = 0
     for token in tlist.tokens[idx:]:
         if token.match(*M1):