Skip to content

Commit e6a51a0

Browse files
committed
Use compat module for single Python 2/3 code base.
This change includes minor fixes and code cleanup too.
1 parent 8bfdaf3 commit e6a51a0

File tree

16 files changed

+93
-92
lines changed

16 files changed

+93
-92
lines changed

.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ python:
44
- "3.5"
55
- "3.4"
66
- "3.3"
7-
- "3.2"
87
- "2.7"
98
- "pypy"
109
- "pypy3"

docs/source/intro.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ Each object can be converted back to a string at any time:
100100

101101
.. code-block:: python
102102
103-
>>> unicode(stmt) # str(stmt) for Python 3
104-
u'select * from "someschema"."mytable" where id = 1'
105-
>>> unicode(stmt.tokens[-1]) # or just the WHERE part
106-
u'where id = 1'
103+
>>> str(stmt) # str(stmt) for Python 3
104+
'select * from "someschema"."mytable" where id = 1'
105+
>>> str(stmt.tokens[-1]) # or just the WHERE part
106+
'where id = 1'
107107
108108
Details of the returned objects are described in :ref:`analyze`.
109109

setup.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ def get_version():
6767
>>> res
6868
(<Statement 'select...' at 0x9ad08ec>,)
6969
>>> stmt = res[0]
70-
>>> unicode(stmt) # converting it back to unicode
71-
u'select * from someschema.mytable where id = 1'
70+
>>> str(stmt) # converting it back to unicode
71+
'select * from someschema.mytable where id = 1'
7272
>>> # This is how the internal representation looks like:
7373
>>> stmt.tokens
7474
(<DML 'select' at 0x9b63c34>,
@@ -110,7 +110,6 @@ def get_version():
110110
'Programming Language :: Python :: 2',
111111
'Programming Language :: Python :: 2.7',
112112
'Programming Language :: Python :: 3',
113-
'Programming Language :: Python :: 3.2',
114113
'Programming Language :: Python :: 3.3',
115114
'Programming Language :: Python :: 3.4',
116115
'Programming Language :: Python :: 3.5',

sqlparse/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from sqlparse import filters
1515
from sqlparse import formatter
1616

17+
from sqlparse.compat import u
1718
# Deprecated in 0.1.5. Will be removed in 0.2.0
1819
from sqlparse.exceptions import SQLParseError
1920

@@ -67,7 +68,7 @@ def split(sql, encoding=None):
6768
"""
6869
stack = engine.FilterStack()
6970
stack.split_statements = True
70-
return [unicode(stmt).strip() for stmt in stack.run(sql, encoding)]
71+
return [u(stmt).strip() for stmt in stack.run(sql, encoding)]
7172

7273

7374
from sqlparse.engine.filter import StatementFilter

sqlparse/compat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@
1919
from io import StringIO
2020

2121
def u(s):
22-
return s
22+
return str(s)
2323

2424
elif PY2:
2525
text_type = unicode
2626
string_types = (basestring,)
2727
from StringIO import StringIO # flake8: noqa
2828

2929
def u(s):
30-
return unicode(s, 'unicode_escape')
30+
return unicode(s)
3131

3232

3333
# Directly copied from six:

sqlparse/filters.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from os.path import abspath, join
66

77
from sqlparse import sql, tokens as T
8+
from sqlparse.compat import u, text_type
89
from sqlparse.engine import FilterStack
910
from sqlparse.lexer import tokenize
1011
from sqlparse.pipeline import Pipeline
@@ -25,7 +26,7 @@ def __init__(self, case=None):
2526
if case is None:
2627
case = 'upper'
2728
assert case in ['lower', 'upper', 'capitalize']
28-
self.convert = getattr(unicode, case)
29+
self.convert = getattr(text_type, case)
2930

3031
def process(self, stack, stream):
3132
for ttype, value in stream:
@@ -52,7 +53,7 @@ class TruncateStringFilter:
5253

5354
def __init__(self, width, char):
5455
self.width = max(width, 1)
55-
self.char = unicode(char)
56+
self.char = u(char)
5657

5758
def process(self, stack, stream):
5859
for ttype, value in stream:
@@ -154,7 +155,7 @@ def process(self, stack, stream):
154155
f.close()
155156

156157
# There was a problem loading the include file
157-
except IOError, err:
158+
except IOError as err:
158159
# Raise the exception to the interpreter
159160
if self.raiseexceptions:
160161
raise
@@ -171,7 +172,7 @@ def process(self, stack, stream):
171172
self.raiseexceptions)
172173

173174
# Max recursion limit reached
174-
except ValueError, err:
175+
except ValueError as err:
175176
# Raise the exception to the interpreter
176177
if self.raiseexceptions:
177178
raise
@@ -300,7 +301,7 @@ def _flatten_up_to_token(self, token):
300301
raise StopIteration
301302

302303
def _get_offset(self, token):
303-
raw = ''.join(map(unicode, self._flatten_up_to_token(token)))
304+
raw = ''.join(map(text_type, self._flatten_up_to_token(token)))
304305
line = raw.splitlines()[-1]
305306
# Now take current offset into account and return relative offset.
306307
full_offset = len(line) - len(self.char * (self.width * self.indent))
@@ -340,7 +341,7 @@ def _next_token(i):
340341
if prev and prev.is_whitespace() and prev not in added:
341342
tlist.tokens.pop(tlist.token_index(prev))
342343
offset += 1
343-
uprev = unicode(prev)
344+
uprev = u(prev)
344345
if (prev and (uprev.endswith('\n') or uprev.endswith('\r'))):
345346
nl = tlist.token_next(token)
346347
else:
@@ -462,7 +463,7 @@ def process(self, stack, stmt):
462463
self._process(stmt)
463464
if isinstance(stmt, sql.Statement):
464465
if self._last_stmt is not None:
465-
if unicode(self._last_stmt).endswith('\n'):
466+
if u(self._last_stmt).endswith('\n'):
466467
nl = '\n'
467468
else:
468469
nl = '\n\n'
@@ -494,7 +495,7 @@ def _process(self, stack, group, stream):
494495
and not token.__class__ in self.keep_together):
495496
token.tokens = self._process(stack, token, token.tokens)
496497
else:
497-
val = unicode(token)
498+
val = u(token)
498499
if len(self.line) + len(val) > self.width:
499500
match = re.search('^ +', self.line)
500501
if match is not None:
@@ -568,7 +569,7 @@ def process(self, stack, stream):
568569
class SerializerUnicode:
569570

570571
def process(self, stack, stmt):
571-
raw = unicode(stmt)
572+
raw = u(stmt)
572573
lines = split_unquoted_newlines(raw)
573574
res = '\n'.join(line.rstrip() for line in lines)
574575
return res
@@ -578,7 +579,7 @@ def Tokens2Unicode(stream):
578579
result = ""
579580

580581
for _, value in stream:
581-
result += unicode(value)
582+
result += u(value)
582583

583584
return result
584585

@@ -600,7 +601,7 @@ def process(self, stack, stmt):
600601
else:
601602
varname = self.varname
602603

603-
has_nl = len(unicode(stmt).strip().splitlines()) > 1
604+
has_nl = len(u(stmt).strip().splitlines()) > 1
604605
stmt.tokens = self._process(stmt.tokens, varname, has_nl)
605606
return stmt
606607

sqlparse/lexer.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from sqlparse import tokens
1919
from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
20-
from cStringIO import StringIO
20+
from sqlparse.compat import StringIO, string_types, with_metaclass, text_type
2121

2222

2323
class include(str):
@@ -81,14 +81,14 @@ def _process_state(cls, unprocessed, processed, state):
8181

8282
try:
8383
rex = re.compile(tdef[0], rflags).match
84-
except Exception, err:
84+
except Exception as err:
8585
raise ValueError(("uncompilable regex %r in state"
8686
" %r of %r: %s"
8787
% (tdef[0], state, cls, err)))
8888

8989
assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
90-
('token type must be simple type or callable, not %r'
91-
% (tdef[1],))
90+
('token type must be simple type or callable, not %r'
91+
% (tdef[1],))
9292

9393
if len(tdef) == 2:
9494
new_state = None
@@ -113,7 +113,7 @@ def _process_state(cls, unprocessed, processed, state):
113113
itokens = []
114114
for istate in tdef2:
115115
assert istate != state, \
116-
'circular state ref %r' % istate
116+
'circular state ref %r' % istate
117117
itokens.extend(cls._process_state(unprocessed,
118118
processed, istate))
119119
processed[new_state] = itokens
@@ -123,7 +123,7 @@ def _process_state(cls, unprocessed, processed, state):
123123
for state in tdef2:
124124
assert (state in unprocessed or
125125
state in ('#pop', '#push')), \
126-
'unknown new state ' + state
126+
'unknown new state ' + state
127127
new_state = tdef2
128128
else:
129129
assert False, 'unknown new state def %r' % tdef2
@@ -134,7 +134,7 @@ def process_tokendef(cls):
134134
cls._all_tokens = {}
135135
cls._tmpname = 0
136136
processed = cls._all_tokens[cls.__name__] = {}
137-
#tokendefs = tokendefs or cls.tokens[name]
137+
# tokendefs = tokendefs or cls.tokens[name]
138138
for state in cls.tokens.keys():
139139
cls._process_state(cls.tokens, processed, state)
140140
return processed
@@ -152,9 +152,7 @@ def __call__(cls, *args, **kwds):
152152
return type.__call__(cls, *args, **kwds)
153153

154154

155-
class Lexer(object):
156-
157-
__metaclass__ = LexerMeta
155+
class _Lexer(object):
158156

159157
encoding = 'utf-8'
160158
stripall = False
@@ -201,7 +199,8 @@ class Lexer(object):
201199
# cannot be preceded by word character or a right bracket --
202200
# otherwise it's probably an array index
203201
(r'(?<![\w\])])(\[[^\]]+\])', tokens.Name),
204-
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
202+
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
203+
r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
205204
(r'END(\s+IF|\s+LOOP)?\b', tokens.Keyword),
206205
(r'NOT NULL\b', tokens.Keyword),
207206
(r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL),
@@ -258,13 +257,13 @@ def get_tokens(self, text, unfiltered=False):
258257
Also preprocess the text, i.e. expand tabs and strip it if
259258
wanted and applies registered filters.
260259
"""
261-
if isinstance(text, basestring):
260+
if isinstance(text, string_types):
262261
if self.stripall:
263262
text = text.strip()
264263
elif self.stripnl:
265264
text = text.strip('\n')
266265

267-
if sys.version_info[0] < 3 and isinstance(text, unicode):
266+
if sys.version_info[0] < 3 and isinstance(text, text_type):
268267
text = StringIO(text.encode('utf-8'))
269268
self.encoding = 'utf-8'
270269
else:
@@ -350,6 +349,10 @@ def get_tokens_unprocessed(self, stream, stack=('root',)):
350349
break
351350

352351

352+
class Lexer(with_metaclass(LexerMeta, _Lexer)):
353+
pass
354+
355+
353356
def tokenize(sql, encoding=None):
354357
"""Tokenize sql.
355358

sqlparse/sql.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sys
77

88
from sqlparse import tokens as T
9+
from sqlparse.compat import string_types, u
910

1011

1112
class Token(object):
@@ -32,7 +33,7 @@ def __str__(self):
3233
if sys.version_info[0] == 3:
3334
return self.value
3435
else:
35-
return unicode(self).encode('utf-8')
36+
return u(self).encode('utf-8')
3637

3738
def __repr__(self):
3839
short = self._get_repr_value()
@@ -51,13 +52,13 @@ def to_unicode(self):
5152
.. deprecated:: 0.1.5
5253
Use ``unicode(token)`` (for Python 3: ``str(token)``) instead.
5354
"""
54-
return unicode(self)
55+
return u(self)
5556

5657
def _get_repr_name(self):
5758
return str(self.ttype).split('.')[-1]
5859

5960
def _get_repr_value(self):
60-
raw = unicode(self)
61+
raw = u(self)
6162
if len(raw) > 7:
6263
raw = raw[:6] + u'...'
6364
return re.sub('\s+', ' ', raw)
@@ -83,7 +84,7 @@ def match(self, ttype, values, regex=False):
8384
return type_matched
8485

8586
if regex:
86-
if isinstance(values, basestring):
87+
if isinstance(values, string_types):
8788
values = set([values])
8889

8990
if self.ttype is T.Keyword:
@@ -96,7 +97,7 @@ def match(self, ttype, values, regex=False):
9697
return True
9798
return False
9899

99-
if isinstance(values, basestring):
100+
if isinstance(values, string_types):
100101
if self.is_keyword:
101102
return values.upper() == self.normalized
102103
return values == self.value
@@ -172,7 +173,7 @@ def _to_string(self):
172173
if sys.version_info[0] == 3:
173174
return ''.join(x.value for x in self.flatten())
174175
else:
175-
return ''.join(unicode(x) for x in self.flatten())
176+
return ''.join(u(x) for x in self.flatten())
176177

177178
def _get_repr_name(self):
178179
return self.__class__.__name__
@@ -185,9 +186,9 @@ def _pprint_tree(self, max_depth=None, depth=0):
185186
pre = ' +-'
186187
else:
187188
pre = ' | '
188-
print '%s%s%d %s \'%s\'' % (indent, pre, idx,
189+
print('%s%s%d %s \'%s\'' % (indent, pre, idx,
189190
token._get_repr_name(),
190-
token._get_repr_value())
191+
token._get_repr_value()))
191192
if (token.is_group() and (max_depth is None or depth < max_depth)):
192193
token._pprint_tree(max_depth, depth + 1)
193194

@@ -285,7 +286,7 @@ def token_next_match(self, idx, ttype, value, regex=False):
285286
if not isinstance(idx, int):
286287
idx = self.token_index(idx)
287288

288-
for n in xrange(idx, len(self.tokens)):
289+
for n in range(idx, len(self.tokens)):
289290
token = self.tokens[n]
290291
if token.match(ttype, value, regex):
291292
return token
@@ -349,7 +350,7 @@ def token_index(self, token, start=0):
349350
# Performing `index` manually is much faster when starting in the middle
350351
# of the list of tokens and expecting to find the token near to the starting
351352
# index.
352-
for i in xrange(start, len(self.tokens)):
353+
for i in range(start, len(self.tokens)):
353354
if self.tokens[i] == token:
354355
return i
355356
return -1
@@ -518,7 +519,7 @@ def get_typecast(self):
518519
next_ = self.token_next(self.token_index(marker), False)
519520
if next_ is None:
520521
return None
521-
return unicode(next_)
522+
return u(next_)
522523

523524
def get_ordering(self):
524525
"""Returns the ordering or ``None`` as uppercase string."""

tests/test_filters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
'''
66
import unittest
77

8+
from sqlparse.compat import u
89
from sqlparse.filters import StripWhitespace, Tokens2Unicode
910
from sqlparse.lexer import tokenize
1011

0 commit comments

Comments
 (0)