@@ -139,24 +139,23 @@ def process(self, stmt, depth=0):
139139 stmt .tokens .pop (- 1 )
140140
141141
142- class SpacesAroundOperatorsFilter :
142+ class SpacesAroundOperatorsFilter ( object ) :
143143 whitelist = (sql .Identifier , sql .Comparison , sql .Where )
144144
145145 def _process (self , tlist ):
146146 def next_token (idx ):
147- # HACK: distinguish between real wildcard from multiplication operator
148- return tlist . token_next_by_type ( idx , ( T . Operator , T . Comparison , T . Wildcard ))
147+ return tlist . token_next_by ( t = ( T . Operator , T . Comparison ), idx = idx )
148+
149149 idx = 0
150150 token = next_token (idx )
151151 while token :
152152 idx = tlist .token_index (token )
153153 if idx > 0 and tlist .tokens [idx - 1 ].ttype != T .Whitespace :
154- tlist .tokens .insert (idx , sql .Token (T .Whitespace , ' ' )) # insert before
154+ # insert before
155+ tlist .tokens .insert (idx , sql .Token (T .Whitespace , ' ' ))
155156 idx += 1
156157 if idx < len (tlist .tokens ) - 1 :
157- if token .ttype == T .Wildcard and tlist .tokens [idx + 1 ].match (T .Punctuation , ',' ):
158- pass # this must have been a real wildcard, not multiplication
159- elif tlist .tokens [idx + 1 ].ttype != T .Whitespace :
158+ if tlist .tokens [idx + 1 ].ttype != T .Whitespace :
160159 tlist .tokens .insert (idx + 1 , sql .Token (T .Whitespace , ' ' ))
161160
162161 idx += 1
@@ -165,7 +164,7 @@ def next_token(idx):
165164 for sgroup in tlist .get_sublists ():
166165 self ._process (sgroup )
167166
168- def process (self , stack , stmt ):
167+ def process (self , stmt ):
169168 self ._process (stmt )
170169
171170
@@ -365,16 +364,16 @@ def process(self, stmt):
365364 self ._last_stmt = stmt
366365
367366
368- class AlignedIndentFilter :
369- join_words = r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?|(CROSS\s+|NATURAL\s+)?)?JOIN\b '
370- split_words = (
371- 'FROM' ,
372- join_words , 'ON ' ,
373- 'WHERE' , 'AND' , 'OR ' ,
374- 'GROUP ' , 'HAVING ' , 'LIMIT ' ,
375- 'ORDER ' , 'UNION ' , 'VALUES ' ,
376- 'SET ' , 'BETWEEN ' , 'EXCEPT ' ,
377- )
367+ class AlignedIndentFilter ( object ) :
368+ join_words = ( r'((LEFT\s+|RIGHT\s+|FULL\s+)?'
369+ r'(INNER\s+|OUTER\s+|STRAIGHT\s+)?|'
370+ r'(CROSS\s+|NATURAL\s+)?)?JOIN\b' )
371+ split_words = ( 'FROM ' ,
372+ join_words , 'ON ' ,
373+ 'WHERE ' , 'AND ' , 'OR ' ,
374+ 'GROUP ' , 'HAVING ' , 'LIMIT ' ,
375+ 'ORDER ' , 'UNION ' , 'VALUES ' ,
376+ 'SET' , 'BETWEEN' , 'EXCEPT' )
378377
379378 def __init__ (self , char = ' ' , line_width = None ):
380379 self .char = char
@@ -384,43 +383,51 @@ def newline(self):
384383 return sql .Token (T .Newline , '\n ' )
385384
386385 def whitespace (self , chars = 0 , newline_before = False , newline_after = False ):
387- return sql .Token (
388- T .Whitespace ,
389- (str (self .newline ()) if newline_before else '' ) + self .char * chars + (str (self .newline ()) if newline_after else '' ))
386+ return sql .Token (T .Whitespace , ('\n ' if newline_before else '' ) +
387+ self .char * chars + ('\n ' if newline_after else '' ))
390388
391389 def _process_statement (self , tlist , base_indent = 0 ):
392390 if tlist .tokens [0 ].is_whitespace () and base_indent == 0 :
393391 tlist .tokens .pop (0 )
394392
395393 # process the main query body
396- return self ._process (sql .TokenList (tlist .tokens ), base_indent = base_indent )
394+ return self ._process (sql .TokenList (tlist .tokens ),
395+ base_indent = base_indent )
397396
398397 def _process_parenthesis (self , tlist , base_indent = 0 ):
399- if not tlist .token_next_match ( 0 , T .DML , 'SELECT' ):
398+ if not tlist .token_next_by ( m = ( T .DML , 'SELECT' ) ):
400399 # if this isn't a subquery, don't re-indent
401400 return tlist
402401
403- sub_indent = base_indent + self ._max_kwd_len + 2 # add two for the space and parens
404- tlist .insert_after (tlist .tokens [0 ], self .whitespace (sub_indent , newline_before = True ))
402+ # add two for the space and parens
403+ sub_indent = base_indent + self ._max_kwd_len + 2
404+ tlist .insert_after (tlist .tokens [0 ],
405+ self .whitespace (sub_indent , newline_before = True ))
405406 # de-indent the last parenthesis
406- tlist .insert_before (tlist .tokens [- 1 ], self .whitespace (sub_indent - 1 , newline_before = True ))
407+ tlist .insert_before (tlist .tokens [- 1 ],
408+ self .whitespace (sub_indent - 1 ,
409+ newline_before = True ))
407410
408411 # process the inside of the parantheses
409412 tlist .tokens = (
410413 [tlist .tokens [0 ]] +
411- self ._process (sql .TokenList (tlist ._groupable_tokens ), base_indent = sub_indent ).tokens +
414+ self ._process (sql .TokenList (tlist ._groupable_tokens ),
415+ base_indent = sub_indent ).tokens +
412416 [tlist .tokens [- 1 ]]
413- )
417+ )
414418 return tlist
415419
416420 def _process_identifierlist (self , tlist , base_indent = 0 ):
417421 # columns being selected
418422 new_tokens = []
419- identifiers = filter (lambda t : t .ttype not in (T .Punctuation , T .Whitespace , T .Newline ), tlist .tokens )
423+ identifiers = list (filter (
424+ lambda t : t .ttype not in (T .Punctuation , T .Whitespace , T .Newline ),
425+ tlist .tokens ))
420426 for i , token in enumerate (identifiers ):
421427 if i > 0 :
422428 new_tokens .append (self .newline ())
423- new_tokens .append (self .whitespace (self ._max_kwd_len + base_indent + 1 ))
429+ new_tokens .append (
430+ self .whitespace (self ._max_kwd_len + base_indent + 1 ))
424431 new_tokens .append (token )
425432 if i < len (identifiers ) - 1 :
426433 # if not last column in select, add a comma seperator
@@ -437,10 +444,11 @@ def _process_case(self, tlist, base_indent=0):
437444 case_offset = len ('when ' )
438445 cases = tlist .get_cases (skip_ws = True )
439446 # align the end as well
440- end_token = tlist .token_next_match ( 0 , T .Keyword , 'END' )
447+ end_token = tlist .token_next_by ( m = ( T .Keyword , 'END' ) )
441448 cases .append ((None , [end_token ]))
442449
443- condition_width = max (len (' ' .join (map (str , cond ))) for cond , value in cases if cond )
450+ condition_width = max (
451+ len (' ' .join (map (str , cond ))) for cond , value in cases if cond )
444452 for i , (cond , value ) in enumerate (cases ):
445453 if cond is None : # else or end
446454 stmt = value [0 ]
@@ -449,17 +457,20 @@ def _process_case(self, tlist, base_indent=0):
449457 stmt = cond [0 ]
450458 line = cond + value
451459 if i > 0 :
452- tlist .insert_before (stmt , self .whitespace (base_offset + case_offset - len (str (stmt ))))
460+ tlist .insert_before (stmt , self .whitespace (
461+ base_offset + case_offset - len (str (stmt ))))
453462 if cond :
454- tlist .insert_after (cond [- 1 ], self .whitespace (condition_width - len (' ' .join (map (str , cond )))))
463+ tlist .insert_after (cond [- 1 ], self .whitespace (
464+ condition_width - len (' ' .join (map (str , cond )))))
455465
456466 if i < len (cases ) - 1 :
457467 # if not the END add a newline
458468 tlist .insert_after (line [- 1 ], self .newline ())
459469
460470 def _process_substatement (self , tlist , base_indent = 0 ):
461471 def _next_token (i ):
462- t = tlist .token_next_match (i , T .Keyword , self .split_words , regex = True )
472+ t = tlist .token_next_by (m = (T .Keyword , self .split_words , True ),
473+ idx = i )
463474 # treat "BETWEEN x and y" as a single statement
464475 if t and t .value .upper () == 'BETWEEN' :
465476 t = _next_token (tlist .token_index (t ) + 1 )
@@ -470,35 +481,35 @@ def _next_token(i):
470481 idx = 0
471482 token = _next_token (idx )
472483 while token :
484+ # joins are special case. only consider the first word as aligner
473485 if token .match (T .Keyword , self .join_words , regex = True ):
474- # joins are a special case. we only consider the first word of the join as the aligner
475486 token_indent = len (token .value .split ()[0 ])
476487 else :
477488 token_indent = len (str (token ))
478- tlist .insert_before (token , self .whitespace (self ._max_kwd_len - token_indent + base_indent , newline_before = True ))
489+ tlist .insert_before (token , self .whitespace (
490+ self ._max_kwd_len - token_indent + base_indent ,
491+ newline_before = True ))
479492 next_idx = tlist .token_index (token ) + 1
480493 token = _next_token (next_idx )
481494
482495 # process any sub-sub statements
483496 for sgroup in tlist .get_sublists ():
484497 prev_token = tlist .token_prev (tlist .token_index (sgroup ))
485498 indent_offset = 0
499+ # HACK: make "group/order by" work. Longer than _max_kwd_len.
486500 if prev_token and prev_token .match (T .Keyword , 'BY' ):
487- # HACK: make "group by" and "order by" indents work. these are longer than _max_kwd_len.
488501 # TODO: generalize this
489502 indent_offset = 3
490503 self ._process (sgroup , base_indent = base_indent + indent_offset )
491504 return tlist
492505
493- def _process (self , tlist , base_indent = 0 , verbose = False ):
506+ def _process (self , tlist , base_indent = 0 ):
494507 token_name = tlist .__class__ .__name__ .lower ()
495508 func_name = '_process_%s' % token_name
496509 func = getattr (self , func_name , self ._process_substatement )
497- if verbose :
498- print func .__name__ , token_name , str (tlist )
499510 return func (tlist , base_indent = base_indent )
500511
501- def process (self , stack , stmt ):
512+ def process (self , stmt ):
502513 self ._process (stmt )
503514
504515
0 commit comments