-
-
Notifications
You must be signed in to change notification settings - Fork 18
/
merger.py
432 lines (389 loc) · 16.9 KB
/
merger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
import itertools
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple
from sqlfmt.comment import Comment
from sqlfmt.exception import CannotMergeException, SqlfmtSegmentError
from sqlfmt.line import Line
from sqlfmt.mode import Mode
from sqlfmt.node import Node
from sqlfmt.operator_precedence import OperatorPrecedence
from sqlfmt.segment import Segment, create_segments_from_lines
@dataclass
class LineMerger:
mode: Mode
def create_merged_line(self, lines: List[Line]) -> List[Line]:
"""
Returns a new line by merging together all nodes in lines. Raises an
exception if the returned line would be too long, empty, or the nodes in
any of the lines violate the rules in _raise_unmergeable.
"""
if len(lines) <= 1:
return lines
nodes, comments = self._extract_components(lines)
merged_line = Line.from_nodes(
previous_node=lines[0].previous_node,
nodes=nodes,
comments=comments,
)
if merged_line.is_too_long(self.mode.line_length):
raise CannotMergeException("Merged line is too long")
# add in any leading or trailing blank lines
leading_blank_lines = self._extract_leading_blank_lines(lines)
trailing_blank_lines = list(
reversed(self._extract_leading_blank_lines(reversed(lines)))
)
return leading_blank_lines + [merged_line] + trailing_blank_lines
def safe_create_merged_line(self, lines: List[Line]) -> List[Line]:
try:
return self.create_merged_line(lines)
except CannotMergeException:
return lines
@classmethod
def _extract_components(
cls, lines: Iterable[Line]
) -> Tuple[List[Node], List[Comment]]:
"""
Given a list of lines, return 2 components:
1. list of all nodes in those lines, with only a single trailing newline
2. list of all comments in all of those lines
Raise CannotMergeException if lines contain nodes that cannot
be merged.
"""
nodes: List[Node] = []
comments: List[Comment] = []
final_newline: Optional[Node] = None
allow_multiline_jinja = True
has_multiline_jinja = False
has_inline_comment_above = False
for line in lines:
# only merge lines with comments if it's a standalone comment
# above the first line or an inline comment after the last
# line
if line.comments:
if has_inline_comment_above:
raise CannotMergeException(
"Can't merge lines with inline comments and other comments"
)
elif any(
[comment.is_databricks_query_hint for comment in line.comments]
):
raise CannotMergeException(
"Can't merge lines with a databricks type hint comment"
)
elif (
len(line.comments) == 1
and len(line.nodes) > 1
and (
line.comments[0].is_inline
or line.comments[0].previous_node == line.nodes[-2]
)
):
# this is a comment that must be rendered inline,
# so it'll probably block merging unless the
# next line is just a comma
has_inline_comment_above = True
elif len(nodes) == 1 and nodes[0].is_operator:
# if source has standalone operators, we can merge
# the operator into the contents, even if there is
# a comment in the way
pass
elif nodes:
raise CannotMergeException(
"Can't merge lines with standalone comments unless the "
"comments are above the first line"
)
# make an exception for inline comments followed by
# a lonely comma (e.g., leading commas with inline comments)
elif has_inline_comment_above:
if not (line.is_standalone_comma or line.is_blank_line):
raise CannotMergeException(
"Can't merge lines with inline comments unless "
"the following line is a single standalone comma "
"or a blank line"
)
if has_multiline_jinja and not (
line.starts_with_operator or line.starts_with_comma
):
raise CannotMergeException(
"Can't merge lines containing multiline nodes"
)
# skip over newline nodes
content_nodes = [
cls._raise_unmergeable(node, allow_multiline_jinja)
for node in line.nodes
if not node.is_newline
]
if content_nodes:
final_newline = line.nodes[-1]
nodes.extend(content_nodes)
# we can merge a line containing multiline jinja
# into a preceding line iff:
# the multiline node is on the second line and follows a
# standalone operator
if not (
allow_multiline_jinja
and len(content_nodes) == 1
and content_nodes[0].is_operator
):
allow_multiline_jinja = False
# we can merge a line into a preceding line that
# contains multiline jinja iff:
# the line starts with an operator or a comma
has_multiline_jinja = any(
[node.is_multiline_jinja for node in content_nodes]
)
comments.extend(line.comments)
if not nodes or not final_newline:
raise CannotMergeException("Can't merge only whitespace/newlines")
nodes.append(final_newline)
return nodes, comments
@staticmethod
def _raise_unmergeable(node: Node, allow_multiline_jinja: bool) -> Node:
"""
Raises a CannotMergeException if the node cannot be merged. Otherwise
returns the node
"""
if node.formatting_disabled:
raise CannotMergeException(
"Can't merge lines containing disabled formatting"
)
elif node.divides_queries:
raise CannotMergeException(
"Can't merge multiple queries onto a single line"
)
elif node.is_multiline_jinja and not allow_multiline_jinja:
raise CannotMergeException("Can't merge lines containing multiline nodes")
else:
return node
@staticmethod
def _extract_leading_blank_lines(lines: Iterable[Line]) -> List[Line]:
leading_blank_lines: List[Line] = []
for line in lines:
if line.is_blank_line:
leading_blank_lines.append(line)
else:
break
return leading_blank_lines
def maybe_merge_lines(self, lines: List[Line]) -> List[Line]:
"""
Tries to merge lines into a single line; if that fails,
splits lines into segments of equal depth, merges
runs of operators at that depth, and then recurses into
each segment
Returns a new list of Lines
"""
if not lines or all([line.formatting_disabled for line in lines]):
return lines
try:
merged_lines = self.create_merged_line(lines)
except CannotMergeException:
merged_lines = []
# doesn't fit onto a single line, so split into
# segments at the depth of lines[0]
segments = create_segments_from_lines(lines)
# if a segment starts with a standalone operator,
# the first two lines of that segment should likely
# be merged before doing anything else
segments = self._fix_standalone_operators(segments)
if len(segments) > 1:
# merge together segments of equal depth that are
# joined by operators
segments = self._maybe_merge_operators(
segments, OperatorPrecedence.tiers()
)
# some operators really should not be by themselves
# so if their segments are too long to be merged,
# we merge just their first line onto the prior segment
segments = self._maybe_stubbornly_merge(segments)
# then recurse into each segment and try to merge lines
# within individual segments
for segment in segments:
merged_lines.extend(self.maybe_merge_lines(segment))
# if there was only a single segment at the depth of the
# top line, we need to move down one line and try again.
# Because of the structure of a well-split set of lines,
# in this case moving down one line is guaranteed to move
# us in one depth.
# if the final line of the segment matches the top line,
# we need to strip that off so we only segment the
# indented lines
else:
only_segment = segments[0]
try:
_, i = only_segment.head
except SqlfmtSegmentError:
merged_lines.extend(only_segment)
else:
merged_lines.extend(only_segment[: i + 1])
for segment in only_segment.split_after(i):
merged_lines.extend(self.maybe_merge_lines(segment))
return merged_lines
def _fix_standalone_operators(self, segments: List[Segment]) -> List[Segment]:
"""
If the first line of a segment is a standalone operator,
we should try to merge the first two lines together before
doing anything else
"""
for segment in segments:
try:
head, i = segment.head
if head.is_standalone_operator:
remainder_after_operator = Segment(segment[i + 1 :])
_, j = remainder_after_operator.head
try:
merged_lines = self.create_merged_line(segment[: i + j + 2])
segment[: i + j + 2] = merged_lines
except CannotMergeException:
pass
except SqlfmtSegmentError:
pass
return segments
def _maybe_merge_operators(
self,
segments: List[Segment],
op_tiers: List[OperatorPrecedence],
) -> List[Segment]:
"""
Tries to merge runs of segments that start with operators into previous
segments. Operators have a priority that determines a sort of hierarchy;
if we can't merge a whole run of operators, we increase the priority to
create shorter runs that can be merged
"""
if len(segments) <= 1 or not op_tiers:
return segments
head = 0
new_segments: List[Segment] = []
precedence = op_tiers.pop()
for i, segment in enumerate(segments[1:], start=1):
if not self._segment_continues_operator_sequence(segment, precedence):
new_segments.extend(
self._try_merge_operator_segments(segments[head:i], op_tiers.copy())
)
head = i
# we need to try one more time to merge everything after head
else:
new_segments.extend(
self._try_merge_operator_segments(segments[head:], op_tiers.copy())
)
return new_segments
@classmethod
def _segment_continues_operator_sequence(
cls, segment: Segment, max_precedence: OperatorPrecedence
) -> bool:
"""
Returns true if the first line of the segment is part
of a sequence of operators of priority <= max_priority
"""
try:
line, _ = segment.head
except SqlfmtSegmentError:
# if a segment is blank, keep scanning
return True
else:
return (
line.starts_with_operator
and not line.previous_token_is_comma
and OperatorPrecedence.from_node(line.nodes[0]) <= max_precedence
) or line.starts_with_comma
def _try_merge_operator_segments(
self, segments: List[Segment], op_tiers: List[OperatorPrecedence]
) -> List[Segment]:
"""
Attempts to merge segments into a single line; if that fails,
recurses at a lower operator priority
"""
if len(segments) <= 1:
return segments
try:
new_segments = [
Segment(self.create_merged_line(list(itertools.chain(*segments))))
]
except CannotMergeException:
new_segments = self._maybe_merge_operators(segments, op_tiers)
return new_segments
def _maybe_stubbornly_merge(self, segments: List[Segment]) -> List[Segment]:
"""
We prefer some operators, like `as`, `over()`, `exclude()`, and
array or dictionary accessing with `[]` to be
forced onto the prior line, even if the contents of their brackets
don't fit there. This is also true for most operators that open
a bracket, like `in ()` or `+ ()`, as long as the preceding segment
does not also start with an operator.
This method scans for segments that start with
such operators and partially merges those segments with the prior
segments by calling _stubbornly_merge()
"""
if len(segments) <= 1:
return segments
new_segments = [segments[0]]
# first stubborn-merge all p0 operators
for segment in segments[1:]:
if (
# always stubbornly merge P0 operators (e.g., `over`)
self._segment_continues_operator_sequence(
segment, max_precedence=OperatorPrecedence.OTHER_TIGHT
)
):
new_segments = self._stubbornly_merge(new_segments, segment)
else:
new_segments.append(segment)
if len(new_segments) == 1:
return new_segments
# next, stubbon-merge qualifying p1 operators
segments = new_segments
new_segments = [segments[0]]
starts_with_p1_operator = [
self._segment_continues_operator_sequence(
segment, max_precedence=OperatorPrecedence.COMPARATORS
)
for segment in segments
]
for i, segment in enumerate(segments[1:], start=1):
if (
not starts_with_p1_operator[i - 1]
and starts_with_p1_operator[i]
and Segment(self.safe_create_merged_line(segment)).tail_closes_head
):
new_segments = self._stubbornly_merge(new_segments, segment)
else:
new_segments.append(segment)
return new_segments
def _stubbornly_merge(
self, prev_segments: List[Segment], segment: Segment
) -> List[Segment]:
"""
Attempts several different methods of merging the last segment in
new_segments and segment. Returns a list of segments that represent the
best possible merger of those segments
"""
new_segments = prev_segments.copy()
prev_segment = new_segments.pop()
try:
head, i = segment.head
except SqlfmtSegmentError:
new_segments.extend([prev_segment, segment])
return new_segments
# try to merge the first line of this segment with the previous segment
try:
prev_segment = Segment(self.create_merged_line(prev_segment + [head]))
prev_segment.extend(segment[i + 1 :])
new_segments.append(prev_segment)
except CannotMergeException:
# try to add this segment to the last line of the previous segment
last_line, k = prev_segment.tail
try:
new_last_lines = self.create_merged_line([last_line] + segment)
prev_segment[-(k + 1) :] = new_last_lines
new_segments.append(prev_segment)
except CannotMergeException:
# try to add just the first line of this segment to the last
# line of the previous segment
try:
new_last_lines = self.create_merged_line([last_line, head])
prev_segment[-(k + 1) :] = new_last_lines
prev_segment.extend(segment[i + 1 :])
new_segments.append(prev_segment)
except CannotMergeException:
# give up and just return the original segments
new_segments.extend([prev_segment, segment])
return new_segments