Remove buffered reading again (fixes andialbrecht#114).

andialbrecht · andialbrecht · commit 2aa7f0c68f5b · 2013-10-23T10:27:33.000+02:00
It causes problems with some sources.
diff --git a/CHANGES b/CHANGES
@@ -1,3 +1,11 @@
+Development Version
+-------------------
+
+Bug Fixes
+* Removed buffered reading again, it obviously causes wrong parsing in some rare
+  cases (issue114).
+
+
 Release 0.1.9 (Sep 28, 2013)
 ----------------------------
 
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
@@ -161,9 +161,6 @@ class Lexer(object):
     stripnl = False
     tabsize = 0
     flags = re.IGNORECASE | re.UNICODE
-    DEFAULT_BUFSIZE = 4096
-    MAX_BUFSIZE = 2 ** 31
-    bufsize = DEFAULT_BUFSIZE
 
     tokens = {
         'root': [
@@ -286,18 +283,13 @@ def get_tokens_unprocessed(self, stream, stack=('root',)):
         statetokens = tokendefs[statestack[-1]]
         known_names = {}
 
-        text = stream.read(self.bufsize)
-        hasmore = len(text) == self.bufsize
+        text = stream.read()
         text = self._decode(text)
 
         while 1:
             for rexmatch, action, new_state in statetokens:
                 m = rexmatch(text, pos)
                 if m:
-                    if hasmore and m.end() == len(text):
-                        # Since this is end, token may be truncated
-                        continue
-
                     # print rex.pattern
                     value = m.group()
                     if value in known_names:
@@ -330,20 +322,8 @@ def get_tokens_unprocessed(self, stream, stack=('root',)):
                         else:
                             assert False, "wrong state def: %r" % new_state
                         statetokens = tokendefs[statestack[-1]]
-                    # reset bufsize
-                    self.bufsize = self.DEFAULT_BUFSIZE
                     break
             else:
-                if hasmore:
-                    # we have no match, increase bufsize to parse lengthy
-                    # tokens faster (see #86).
-                    self.bufsize = min(self.bufsize * 2, self.MAX_BUFSIZE)
-                    buf = stream.read(self.bufsize)
-                    hasmore = len(buf) == self.bufsize
-                    text = text[pos:] + self._decode(buf)
-                    pos = 0
-                    continue
-
                 try:
                     if text[pos] == '\n':
                         # at EOL, reset state to "root"