[tokenizer] Grouping GROUP/ORDER BY

John Bodley · andialbrecht · commit c4e9dae235ab · 2019-03-10T08:59:16.000+01:00
diff --git a/sqlparse/filters/aligned_indent.py b/sqlparse/filters/aligned_indent.py
@@ -15,11 +15,12 @@ class AlignedIndentFilter(object):
     join_words = (r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
                   r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
                   r'(CROSS\s+|NATURAL\s+)?)?JOIN\b')
+    by_words = ('GROUP BY', 'ORDER BY')
     split_words = ('FROM',
                    join_words, 'ON',
                    'WHERE', 'AND', 'OR',
-                   'GROUP', 'HAVING', 'LIMIT',
-                   'ORDER', 'UNION', 'VALUES',
+                   'GROUP BY', 'HAVING', 'LIMIT',
+                   'ORDER BY', 'UNION', 'VALUES',
                    'SET', 'BETWEEN', 'EXCEPT')
 
     def __init__(self, char=' ', n='\n'):
@@ -101,8 +102,12 @@ def _next_token(self, tlist, idx=-1):
     def _split_kwds(self, tlist):
         tidx, token = self._next_token(tlist)
         while token:
-            # joins are special case. only consider the first word as aligner
-            if token.match(T.Keyword, self.join_words, regex=True):
+            # joins, group/order by are special case. only consider the first
+            # word as aligner
+            if (
+                token.match(T.Keyword, self.join_words, regex=True) or
+                token.match(T.Keyword, ('GROUP BY', 'ORDER BY'))
+            ):
                 token_indent = token.value.split()[0]
             else:
                 token_indent = text_type(token)
@@ -117,7 +122,9 @@ def _process_default(self, tlist):
             idx = tlist.token_index(sgroup)
             pidx, prev_ = tlist.token_prev(idx)
             # HACK: make "group/order by" work. Longer than max_len.
-            offset_ = 3 if (prev_ and prev_.match(T.Keyword, 'BY')) else 0
+            offset_ = 3 if (
+                prev_ and prev_.match(T.Keyword, ('GROUP BY', 'ORDER BY'))
+            ) else 0
             with offset(self, offset_):
                 self._process(sgroup)
 
diff --git a/sqlparse/filters/reindent.py b/sqlparse/filters/reindent.py
@@ -54,7 +54,7 @@ def nl(self, offset=0):
 
     def _next_token(self, tlist, idx=-1):
         split_words = ('FROM', 'STRAIGHT_JOIN$', 'JOIN$', 'AND', 'OR',
-                       'GROUP', 'ORDER', 'UNION', 'VALUES',
+                       'GROUP BY', 'ORDER BY', 'UNION', 'VALUES',
                        'SET', 'BETWEEN', 'EXCEPT', 'HAVING', 'LIMIT')
         m_split = T.Keyword, split_words, True
         tidx, token = tlist.token_next_by(m=m_split, idx=idx)
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
@@ -78,6 +78,8 @@ def is_keyword(value):
         (r'UNION\s+ALL\b', tokens.Keyword),
         (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
         (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin),
+        (r'GROUP\s+BY\b', tokens.Keyword),
+        (r'ORDER\s+BY\b', tokens.Keyword),
 
         (r'[0-9_A-Z&Agrave;-&Uuml;][_$#\w]*', is_keyword),
 
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
@@ -536,14 +536,14 @@ class Where(TokenList):
     """A WHERE clause."""
     M_OPEN = T.Keyword, 'WHERE'
     M_CLOSE = T.Keyword, (
-        'ORDER', 'GROUP', 'LIMIT', 'UNION', 'UNION ALL', 'EXCEPT',
+        'ORDER BY', 'GROUP BY', 'LIMIT', 'UNION', 'UNION ALL', 'EXCEPT',
         'HAVING', 'RETURNING', 'INTO')
 
 
 class Having(TokenList):
     """A HAVING clause."""
     M_OPEN = T.Keyword, 'HAVING'
-    M_CLOSE = T.Keyword, ('ORDER', 'LIMIT')
+    M_CLOSE = T.Keyword, ('ORDER BY', 'LIMIT')
 
 
 class Case(TokenList):
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
@@ -211,7 +211,7 @@ def test_grouping_where():
     s = 'select * from foo where bar = 1 order by id desc'
     p = sqlparse.parse(s)[0]
     assert str(p) == s
-    assert len(p.tokens) == 14
+    assert len(p.tokens) == 12
 
     s = 'select x from (select y from foo where bar = 1) z'
     p = sqlparse.parse(s)[0]
diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py
@@ -183,3 +183,15 @@ def test_parse_identifiers(s):
     token = p.tokens[0]
     assert str(token) == s
     assert isinstance(token, sql.Identifier)
+
+
+def test_parse_group_by():
+    p = sqlparse.parse('GROUP BY')[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype is T.Keyword
+
+
+def test_parse_order_by():
+    p = sqlparse.parse('ORDER BY')[0]
+    assert len(p.tokens) == 1
+    assert p.tokens[0].ttype is T.Keyword