Skip to content

Commit 7abd76f

Browse files
authored
Fix: Do not split the between operator's and (#127)
* fix: prevent line splitting on between's and, closes #124 * refactor: modularize node logic * chore: update changelog * chore: fix changelog * refactor: modularize logic
1 parent 1de65be commit 7abd76f

File tree

9 files changed

+161
-8
lines changed

9 files changed

+161
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
77
### Fixes
88

99
- adds support for numbered field references (e.g., `$1`) and snowflake stages (`@my_stage`) as identifiers
10+
- do not split lines before the `between` operator's `and` keyword ([#124](https://github.com/tconbeer/sqlfmt/issues/124) - thank you [@WestComputing](https://github.com/WestComputing)!)
1011

1112
## [0.5.0] - 2022-02-02
1213

src/sqlfmt/node.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ def is_operator(self) -> bool:
162162
or self.is_multiplication_star
163163
)
164164

165+
@property
166+
def is_boolean_operator(self) -> bool:
167+
return self.token.type == TokenType.BOOLEAN_OPERATOR
168+
165169
@property
166170
def is_multiplication_star(self) -> bool:
167171
"""
@@ -180,6 +184,39 @@ def is_multiplication_star(self) -> bool:
180184
in (TokenType.UNTERM_KEYWORD, TokenType.COMMA, TokenType.DOT)
181185
)
182186

187+
@property
188+
def is_the_between_operator(self) -> bool:
189+
"""
190+
True if this node is a WORD_OPERATOR with the value "between"
191+
"""
192+
return self.token.type == TokenType.WORD_OPERATOR and self.value == "between"
193+
194+
@cached_property
195+
def has_preceding_between_operator(self) -> bool:
196+
"""
197+
True if this node has a preceding "between" operator at the same depth
198+
"""
199+
prev = self.previous_node.previous_node if self.previous_node else None
200+
while prev and prev.depth >= self.depth:
201+
if prev.depth == self.depth and prev.is_the_between_operator:
202+
return True
203+
elif prev.depth == self.depth and prev.is_boolean_operator:
204+
break
205+
else:
206+
prev = prev.previous_node
207+
return False
208+
209+
@property
210+
def is_the_and_after_the_between_operator(self) -> bool:
211+
"""
212+
True if this node is a BOOLEAN_OPERATOR with the value "and" immediately
213+
following a "between" operator
214+
"""
215+
if not self.is_boolean_operator or self.value != "and":
216+
return False
217+
else:
218+
return self.has_preceding_between_operator
219+
183220
@property
184221
def is_low_priority_merge_operator(self) -> bool:
185222
return self.token.type in (TokenType.BOOLEAN_OPERATOR, TokenType.ON)

src/sqlfmt/splitter.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,22 @@ def maybe_split_before(self, node: Node) -> bool:
5050
):
5151
return True
5252
# split before any operator unless the previous node is a closing
53-
# bracket or statement
54-
elif (
53+
# bracket or statement, or it is the "and" following a "between"
54+
else:
55+
return self.maybe_split_before_operator(node)
56+
57+
def maybe_split_before_operator(self, node: Node) -> bool:
58+
"""
59+
Return true if this node is an operator and it has the
60+
right context for splitting before it. We do not split before
61+
operators that follow closing brackets
62+
"""
63+
return (
5564
node.is_operator
56-
and node.previous_node
65+
and node.previous_node is not None
5766
and not node.previous_node.is_closing_bracket
58-
):
59-
return True
60-
else:
61-
return False
67+
and not node.is_the_and_after_the_between_operator
68+
)
6269

6370
def maybe_split_after(self, node: Node) -> bool:
6471
"""
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
select
2+
radio,
3+
mcc,
4+
net as mnc,
5+
area as lac,
6+
cell % 65536 as cid,
7+
cell / 65536 as rnc,
8+
cell as long_cid,
9+
lon,
10+
lat
11+
from
12+
towershift
13+
where
14+
radio != 'CDMA'
15+
and mcc between 200 and 799 and net between 1 and 999 and area between 0 and 65535
16+
and cell between 0 and 268435455 and lon between -180 and 180
17+
and lat between -90 and 90
18+
)))))__SQLFMT_OUTPUT__(((((
19+
select
20+
radio,
21+
mcc,
22+
net as mnc,
23+
area as lac,
24+
cell % 65536 as cid,
25+
cell / 65536 as rnc,
26+
cell as long_cid,
27+
lon,
28+
lat
29+
from towershift
30+
where
31+
radio != 'CDMA'
32+
and mcc between 200 and 799
33+
and net between 1 and 999
34+
and area between 0 and 65535
35+
and cell between 0 and 268435455
36+
and lon between -180 and 180
37+
and lat between -90 and 90
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
where
2+
radio != 'CDMA'
3+
and mcc between 200 and 799
4+
and net between 1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 and 999
5+
and area between smallest_area and biggest_area
6+
and cell between (select min(cell_number) from numbers) and (
7+
select max(cell_number) from numbers
8+
) and lon between -180 and 180
9+
or lat between -90 and 90

tests/functional_tests/test_end_to_end.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_end_to_end_check_unformatted(
111111
result = sqlfmt_runner.invoke(sqlfmt_main, args=args)
112112

113113
assert result
114-
assert "13 files" in result.stderr
114+
assert "14 files" in result.stderr
115115
assert "failed formatting check" in result.stderr
116116

117117
if "-q" in options or "--quiet" in options:

tests/functional_tests/test_general_formatting.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"unformatted/108_test_block.sql",
2323
"unformatted/109_lateral_flatten.sql",
2424
"unformatted/110_other_identifiers.sql",
25+
"unformatted/111_chained_boolean_between.sql",
2526
"unformatted/200_base_model.sql",
2627
"unformatted/300_jinjafmt.sql",
2728
],

tests/unit_tests/test_node.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,39 @@ def test_from_token_raises_bracket_error_on_jinja_block_end() -> None:
154154
)
155155
with pytest.raises(SqlfmtBracketError):
156156
_ = Node.from_token(t, previous_node=None)
157+
158+
159+
@pytest.mark.parametrize(
160+
"token,result", [("between", True), ("BETWEEN", True), ("like", False)]
161+
)
162+
def test_is_the_between_operator(token: str, result: bool) -> None:
163+
t = Token(
164+
type=TokenType.WORD_OPERATOR,
165+
prefix="",
166+
token=token,
167+
spos=0,
168+
epos=7,
169+
)
170+
n = Node.from_token(t, previous_node=None)
171+
assert n.is_the_between_operator is result
172+
173+
174+
def test_is_the_and_after_the_between_operator(default_mode: Mode) -> None:
175+
source_string, _ = read_test_data(
176+
"unit_tests/test_line/test_is_the_and_after_the_between_operator.sql"
177+
)
178+
q = default_mode.dialect.initialize_analyzer(
179+
line_length=default_mode.line_length
180+
).parse_query(source_string=source_string)
181+
182+
and_nodes = [
183+
node for node in q.nodes if node.token.type == TokenType.BOOLEAN_OPERATOR
184+
]
185+
other_nodes = [
186+
node for node in q.nodes if node.token.type != TokenType.BOOLEAN_OPERATOR
187+
]
188+
boolean_ands = and_nodes[::2]
189+
between_ands = and_nodes[1::2]
190+
assert all([not n.is_the_and_after_the_between_operator for n in boolean_ands])
191+
assert all([n.is_the_and_after_the_between_operator for n in between_ands])
192+
assert all([not n.is_the_and_after_the_between_operator for n in other_nodes])

tests/unit_tests/test_splitter.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,28 @@ def test_jinja_block_split(splitter: LineSplitter) -> None:
206206
actual_result = "".join([str(line).lstrip() for line in split_lines])
207207

208208
assert actual_result == expected_result
209+
210+
211+
def test_split_at_and(splitter: LineSplitter) -> None:
212+
source_string = "select 1 where a between b and c and d between e and f and a < b\n"
213+
raw_query = splitter.mode.dialect.initialize_analyzer(
214+
splitter.mode.line_length
215+
).parse_query(source_string)
216+
217+
split_lines: List[Line] = []
218+
for raw_line in raw_query.lines:
219+
split_lines.extend(splitter.maybe_split(raw_line))
220+
221+
actual_result = [str(line) for line in split_lines]
222+
expected_result = [
223+
"select\n",
224+
" 1\n",
225+
"where\n",
226+
" a\n",
227+
" between b and c\n",
228+
" and d\n",
229+
" between e and f\n",
230+
" and a\n",
231+
" < b\n",
232+
]
233+
assert actual_result == expected_result

0 commit comments

Comments
 (0)