|
5 | 5 | # This module is part of python-sqlparse and is released under |
6 | 6 | # the BSD License: https://opensource.org/licenses/BSD-3-Clause |
7 | 7 |
|
8 | | -import re |
9 | | - |
10 | 8 | from sqlparse import tokens |
11 | 9 |
|
12 | 10 | # object() only supports "is" and is useful as a marker |
| 11 | +# use this marker to specify that the given regex in SQL_REGEX |
| 12 | +# shall be processed further through a lookup in the KEYWORDS dictionaries |
13 | 13 | PROCESS_AS_KEYWORD = object() |
14 | 14 |
|
15 | 15 |
|
16 | | -SQL_REGEX = { |
17 | | - 'root': [ |
18 | | - (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), |
19 | | - (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), |
20 | | - |
21 | | - (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), |
22 | | - (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), |
23 | | - |
24 | | - (r'(\r\n|\r|\n)', tokens.Newline), |
25 | | - (r'\s+?', tokens.Whitespace), |
26 | | - |
27 | | - (r':=', tokens.Assignment), |
28 | | - (r'::', tokens.Punctuation), |
29 | | - |
30 | | - (r'\*', tokens.Wildcard), |
31 | | - |
32 | | - (r"`(``|[^`])*`", tokens.Name), |
33 | | - (r"´(´´|[^´])*´", tokens.Name), |
34 | | - (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal), |
35 | | - |
36 | | - (r'\?', tokens.Name.Placeholder), |
37 | | - (r'%(\(\w+\))?s', tokens.Name.Placeholder), |
38 | | - (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder), |
39 | | - |
40 | | - (r'\\\w+', tokens.Command), |
41 | | - |
42 | | - # FIXME(andi): VALUES shouldn't be listed here |
43 | | - # see https://github.com/andialbrecht/sqlparse/pull/64 |
44 | | - # AS and IN are special, it may be followed by a parenthesis, but |
45 | | - # are never functions, see issue183 and issue507 |
46 | | - (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword), |
47 | | - |
48 | | - (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name), |
49 | | - |
50 | | - # see issue #39 |
51 | | - # Spaces around period `schema . name` are valid identifier |
52 | | - # TODO: Spaces before period not implemented |
53 | | - (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name'. |
54 | | - # FIXME(atronah): never match, |
55 | | - # because `re.match` doesn't work with look-behind regexp feature |
56 | | - (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name), # .'Name' |
57 | | - (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func |
58 | | - (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal), |
59 | | - (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float), |
60 | | - (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', |
61 | | - tokens.Number.Float), |
62 | | - (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), |
63 | | - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), |
64 | | - # not a real string literal in ANSI SQL: |
65 | | - (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), |
66 | | - (r'(""|".*?[^\\]")', tokens.String.Symbol), |
67 | | - # sqlite names can be escaped with [square brackets]. left bracket |
68 | | - # cannot be preceded by word character or a right bracket -- |
69 | | - # otherwise it's probably an array index |
70 | | - (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name), |
71 | | - (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' |
72 | | - r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), |
73 | | - (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword), |
74 | | - (r'NOT\s+NULL\b', tokens.Keyword), |
75 | | - (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword), |
76 | | - (r'UNION\s+ALL\b', tokens.Keyword), |
77 | | - (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), |
78 | | - (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), |
79 | | - (r'GROUP\s+BY\b', tokens.Keyword), |
80 | | - (r'ORDER\s+BY\b', tokens.Keyword), |
81 | | - (r'HANDLER\s+FOR\b', tokens.Keyword), |
82 | | - (r'(LATERAL\s+VIEW\s+)' |
83 | | - r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', |
84 | | - tokens.Keyword), |
85 | | - (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), |
86 | | - (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), |
87 | | - (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), |
88 | | - # Check for keywords, also returns tokens.Name if regex matches |
89 | | - # but the match isn't a keyword. |
90 | | - (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), |
91 | | - (r'[;:()\[\],\.]', tokens.Punctuation), |
92 | | - (r'[<>=~!]+', tokens.Operator.Comparison), |
93 | | - (r'[+/@#%^&|^-]+', tokens.Operator), |
94 | | - ]} |
95 | | - |
96 | | -FLAGS = re.IGNORECASE | re.UNICODE |
97 | | -SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']] |
| 16 | +SQL_REGEX = [ |
| 17 | + (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), |
| 18 | + (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), |
| 19 | + |
| 20 | + (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), |
| 21 | + (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), |
| 22 | + |
| 23 | + (r'(\r\n|\r|\n)', tokens.Newline), |
| 24 | + (r'\s+?', tokens.Whitespace), |
| 25 | + |
| 26 | + (r':=', tokens.Assignment), |
| 27 | + (r'::', tokens.Punctuation), |
| 28 | + |
| 29 | + (r'\*', tokens.Wildcard), |
| 30 | + |
| 31 | + (r"`(``|[^`])*`", tokens.Name), |
| 32 | + (r"´(´´|[^´])*´", tokens.Name), |
| 33 | + (r'((?<!\S)\$(?:[_A-ZÀ-Ü]\w*)?\$)[\s\S]*?\1', tokens.Literal), |
| 34 | + |
| 35 | + (r'\?', tokens.Name.Placeholder), |
| 36 | + (r'%(\(\w+\))?s', tokens.Name.Placeholder), |
| 37 | + (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder), |
| 38 | + |
| 39 | + (r'\\\w+', tokens.Command), |
| 40 | + |
| 41 | + # FIXME(andi): VALUES shouldn't be listed here |
| 42 | + # see https://github.com/andialbrecht/sqlparse/pull/64 |
| 43 | + # AS and IN are special, it may be followed by a parenthesis, but |
| 44 | + # are never functions, see issue183 and issue507 |
| 45 | + (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword), |
| 46 | + |
| 47 | + (r'(@|##|#)[A-ZÀ-Ü]\w+', tokens.Name), |
| 48 | + |
| 49 | + # see issue #39 |
| 50 | + # Spaces around period `schema . name` are valid identifier |
| 51 | + # TODO: Spaces before period not implemented |
| 52 | + (r'[A-ZÀ-Ü]\w*(?=\s*\.)', tokens.Name), # 'Name'. |
| 53 | + # FIXME(atronah): never match, |
| 54 | + # because `re.match` doesn't work with look-behind regexp feature |
| 55 | + (r'(?<=\.)[A-ZÀ-Ü]\w*', tokens.Name), # .'Name' |
| 56 | + (r'[A-ZÀ-Ü]\w*(?=\()', tokens.Name), # side effect: change kw to func |
| 57 | + (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal), |
| 58 | + (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float), |
| 59 | + (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', |
| 60 | + tokens.Number.Float), |
| 61 | + (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), |
| 62 | + (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), |
| 63 | + # not a real string literal in ANSI SQL: |
| 64 | + (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), |
| 65 | + (r'(""|".*?[^\\]")', tokens.String.Symbol), |
| 66 | + # sqlite names can be escaped with [square brackets]. left bracket |
| 67 | + # cannot be preceded by word character or a right bracket -- |
| 68 | + # otherwise it's probably an array index |
| 69 | + (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name), |
| 70 | + (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' |
| 71 | + r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), |
| 72 | + (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword), |
| 73 | + (r'NOT\s+NULL\b', tokens.Keyword), |
| 74 | + (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword), |
| 75 | + (r'UNION\s+ALL\b', tokens.Keyword), |
| 76 | + (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), |
| 77 | + (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), |
| 78 | + (r'GROUP\s+BY\b', tokens.Keyword), |
| 79 | + (r'ORDER\s+BY\b', tokens.Keyword), |
| 80 | + (r'HANDLER\s+FOR\b', tokens.Keyword), |
| 81 | + (r'(LATERAL\s+VIEW\s+)' |
| 82 | + r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', |
| 83 | + tokens.Keyword), |
| 84 | + (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), |
| 85 | + (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), |
| 86 | + (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), |
| 87 | + # Check for keywords, also returns tokens.Name if regex matches |
| 88 | + # but the match isn't a keyword. |
| 89 | + (r'[0-9_\w][_$#\w]*', PROCESS_AS_KEYWORD), |
| 90 | + (r'[;:()\[\],\.]', tokens.Punctuation), |
| 91 | + (r'[<>=~!]+', tokens.Operator.Comparison), |
| 92 | + (r'[+/@#%^&|^-]+', tokens.Operator), |
| 93 | +] |
98 | 94 |
|
99 | 95 | KEYWORDS = { |
100 | 96 | 'ABORT': tokens.Keyword, |
|
0 commit comments