Skip to content

Commit

Permalink
fix #201: standardize whitespace inside tokens (#225)
Browse files Browse the repository at this point in the history
* fix #201: standardize whitespace inside tokens

* chore: update changelog and primer
  • Loading branch information
tconbeer authored Aug 2, 2022
1 parent a00a4f5 commit fb3253f
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 279 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Formatting Changes + Bug Fixes

- sqlfmt now standardizes whitespace inside word tokens ([#201](https://github.com/tconbeer/sqlfmt/issues/201))

## [0.10.0] - 2022-08-02

### Features
Expand Down
351 changes: 79 additions & 272 deletions poetry.lock

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions src/sqlfmt/node_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def create_node(self, token: Token, previous_node: Optional[Node]) -> Node:

prev_token, extra_whitespace = get_previous_token(previous_node)
prefix = self.whitespace(token, prev_token, extra_whitespace)
value = self.capitalize(token)
value = self.standardize_value(token)

if token.type in (TokenType.FMT_OFF, TokenType.DATA):
formatting_disabled = True
Expand Down Expand Up @@ -214,10 +214,11 @@ def whitespace(
else:
return SPACE

def capitalize(self, token: Token) -> str:
def standardize_value(self, token: Token) -> str:
"""
Proper style is to lowercase all keywords, statements, and names.
If DB identifiers can't be lowercased, they should be quoted.
Tokens that are words (not symbols) and aren't jinja
or comments should be lowercased and have any internal
whitespace replaced with a single space
"""
if token.type in (
TokenType.UNTERM_KEYWORD,
Expand All @@ -230,7 +231,7 @@ def capitalize(self, token: Token) -> str:
TokenType.BOOLEAN_OPERATOR,
TokenType.SET_OPERATOR,
):
return token.token.lower()
return " ".join(token.token.lower().split())
elif token.type == TokenType.NAME and not self.case_sensitive_names:
return token.token.lower()
else:
Expand Down
4 changes: 2 additions & 2 deletions src/sqlfmt_primer/primer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_projects() -> List[SQLProject]:
SQLProject(
name="gitlab",
git_url="https://github.com/tconbeer/gitlab-analytics-sqlfmt.git",
git_ref="4d75449eeb2d7a97dbb63fae4458de19dd2a382a", # sqlfmt 0d3b30e
git_ref="30645ca4b8a4723b9c4d4177e0f0f34ac7b40bbf", # sqlfmt 49fa97e
expected_changed=4,
expected_unchanged=2413,
expected_errored=0,
Expand All @@ -39,7 +39,7 @@ def get_projects() -> List[SQLProject]:
SQLProject(
name="rittman",
git_url="https://github.com/tconbeer/rittman_ra_data_warehouse.git",
git_ref="0d5492b2526d5830a94037f336223fd4e0e3fbd4", # sqlfmt 0d3b30e
git_ref="44bb35dd4db50e4113cac8cb8d3521da8d998e1d", # sqlfmt 49fa97e
expected_changed=0,
expected_unchanged=307,
expected_errored=4, # true mismatching brackets
Expand Down
29 changes: 29 additions & 0 deletions tests/data/unformatted/117_whitespace_in_tokens.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/* a multiline
comment
*/
select
top
25
*
from "my table"
where
id not
in (
1, 2, 3
)
union
all
select
distinct
*
from "your table"
)))))__SQLFMT_OUTPUT__(((((
/* a multiline
comment
*/
select top 25 *
from "my table"
where id not in (1, 2, 3)
union all
select distinct *
from "your table"
23 changes: 23 additions & 0 deletions tests/unit_tests/test_node_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,3 +317,26 @@ def test_bracket_whitespace(default_mode: Mode, source_string: str) -> None:
).parse_query(source_string=source_string)
parsed_string = "".join(str(line) for line in q.lines)
assert source_string == parsed_string


@pytest.mark.parametrize(
"source_string,expected_string",
[
("union\n\nall", "union all\n"),
("union\n all", "union all\n"),
("union all", "union all\n"),
("select\ntop\n10", "select top 10\n"),
("group by", "group by\n"),
("not\nin", "not in\n"),
("not\n similar \n to", "not similar to\n"),
("right\n outer \n join", "right outer join\n"),
],
)
def test_internal_whitespace(
default_mode: Mode, source_string: str, expected_string: str
) -> None:
q = default_mode.dialect.initialize_analyzer(
line_length=default_mode.line_length
).parse_query(source_string=source_string)
parsed_string = "".join(str(line) for line in q.lines)
assert parsed_string == expected_string

0 comments on commit fb3253f

Please sign in to comment.