Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring to use Syntax Tree #18

Merged
merged 6 commits into from
Oct 19, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
temp commit
  • Loading branch information
Satoshi MATSUZAKI committed Oct 16, 2019
commit e52ead347ce0c1ddf077c9e88ce8ce53442a56ac
28 changes: 14 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ Dockerfile
$ docker build -t sqlint:latest .
...
$ docker run -it sqlint:latset /bin/bash
xxxxx:/work # python3 -m sqlint sqlint/tests/data/query001.sql
sqlint/tests/data/query001.sql:(L2, 6): comma must be head of line
sqlint/tests/data/query001.sql:(L7, 6): comma must be head of line
xxxxx:/work # python3 -m sqlint sqlint/tests/data/query005.sql
sqlint/tests/data/query005.sql:(L2, 6): comma must be head of line
sqlint/tests/data/query005.sql:(L7, 6): comma must be head of line
```

## Checking variations
Expand Down Expand Up @@ -93,17 +93,17 @@ Check if sql statement violates following rules.
## Sample

```
$ sqlint sqlint/tests/data/*
sqlint/tests/data/query001.sql:(L2, 6): comma must be head of line
sqlint/tests/data/query001.sql:(L7, 6): comma must be head of line
sqlint/tests/data/query002.sql:(L1, 1): reserved keywords must be lower case: SELECT -> select
sqlint/tests/data/query002.sql:(L3, 7): reserved keywords must be lower case: COUNT -> count
sqlint/tests/data/query003.sql:(L2, 1): indent steps must be 4 multiples (5)
sqlint/tests/data/query004.sql:(L5, 18): too many spaces
sqlint/tests/data/query005.sql:(L2, 7): whitespace must not be after bracket: (
sqlint/tests/data/query005.sql:(L2, 22): whitespace must not be before bracket: )
sqlint/tests/data/query006.sql:(L3, 8): whitespace must be after binary operator: +c
sqlint/tests/data/query006.sql:(L3, 8): whitespace must be after binary operator: b+
$ sqlint sqlint/tests/samples/*
tests/samples/query001.sql (L2, 1): indent steps must be 4 multiples, but 5 spaces
tests/samples/query002.sql (L6, 16): there are multiple whitespaces
tests/samples/query003.sql (L2, 7): whitespace must not be after bracket: (
tests/samples/query003.sql (L2, 22): whitespace must not be before bracket: )
tests/samples/query004.sql (L3, 8): whitespace must be before binary operator: b+
tests/samples/query004.sql (L3, 8): whitespace must be after binary operator: +c
tests/samples/query005.sql (L2, 6): comma must be head of line
tests/samples/query005.sql (L7, 6): comma must be head of line
tests/samples/query006.sql (L1, 1): reserved keywords must be lower case: SELECT -> select
tests/samples/query006.sql (L3, 7): reserved keywords must be lower case: Count -> count
sqlint/tests/data/query007.sql:(L8, 16): table_name must be at the same line as join context
sqlint/tests/data/query008.sql:(L6, 5): join context must be [left outer join], [inner join] or [cross join]: join
sqlint/tests/data/query008.sql:(L10, 10): join context must be [left outer join], [inner join] or [cross join]: left join
Expand Down
133 changes: 51 additions & 82 deletions src/checker/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABCMeta, abstractmethod
from typing import Dict, List
from typing import Dict, List, Tuple

from . import violation
from .violation import Violation
Expand All @@ -11,7 +11,7 @@
class Checker(metaclass=ABCMeta):
@staticmethod
@abstractmethod
def check(tree: SyntaxTree, config: ConfigLoader) -> List[str]:
def check(tree: SyntaxTree, config: ConfigLoader) -> List[Violation]:
pass


Expand Down Expand Up @@ -66,7 +66,7 @@ def _check(node: Node, keyword_style: str) -> List[Violation]:
continue

word: str = token.word
expected: str = KeywordStyleChecker._get_expected(word, keyword_style)
expected: str = KeywordStyleChecker.get_expected(word, keyword_style)
if word != expected:
params = {'index': idx, 'actual': word, 'expected': expected}
v = violation.KeywordStyleViolation(leaf, keyword_style, **params)
Expand All @@ -78,7 +78,7 @@ def _check(node: Node, keyword_style: str) -> List[Violation]:
return violation_list

@staticmethod
def _get_expected(keyword: str, keyword_style: str) -> str:
def get_expected(keyword: str, keyword_style: str) -> str:
expected: str = keyword

if keyword_style == 'lower':
Expand Down Expand Up @@ -133,7 +133,7 @@ def _check_position(node: Node, comma_position: str) -> List[Violation]:
for idx in comma_indexes:
# If a comma is in brackets, it is appropriate not to break a line at comma.
# So determines that by counting left- and right- brackets at left-right-side.
is_open_bracket = 0 < (tokens[0: idx].count(lb) - tokens[0: idx].count(rb))
is_open_bracket = 0 < (tokens[0:idx].count(lb) - tokens[0:idx].count(rb))
is_close_bracket = 0 < (tokens[idx+1:].count(rb) - tokens[idx+1:].count(lb))

if not is_open_bracket or not is_close_bracket:
Expand Down Expand Up @@ -309,7 +309,7 @@ def check(tree: SyntaxTree, config: ConfigLoader) -> List[Violation]:
expected_list = {}
for k, vs in expected_kvs.items():
expected_list[k] = ' '.join([
KeywordStyleChecker._get_expected(v, keyword_style) for v in vs])
KeywordStyleChecker.get_expected(v, keyword_style) for v in vs])

result.extend(JoinChecker._check_context(tree.root, expected_list))

Expand All @@ -323,13 +323,15 @@ def _check_table_name(node: Node) -> List[Violation]:
for leaf in node.leaves:
for idx, token in enumerate(leaf.contents):
# ignores except join
if token.kind != Token.KEYWORD or token.word.upper() != 'JOIN':
if token.word.upper() != 'JOIN':
continue

# ignores the token next to 'JOIN' is identifier which may be table.
if idx < len(leaf.contents)-1 and leaf.contents[idx+1].kind == Token.IDENTIFIER:
if idx <= len(leaf.contents)-2 and leaf.contents[idx+2].kind == Token.IDENTIFIER:
continue

# TODO: Checks below
# TODO: SubQueries will become violation in the future.
"""
Ignores the token next to 'Join' is 'Select' (maybe SubQuery)
Examples:
Expand All @@ -344,10 +346,6 @@ def _check_table_name(node: Node) -> List[Violation]:
Join (Select id From y)
------
"""
# TODO: SubQueries will be violation in the future.
if idx < len(leaf.contents)-2 and \
('SELECT' in [leaf.contents[idx+1].word.upper(), leaf.contents[idx+2].word.upper()]):
continue

v = violation.JoinTableViolation(leaf, index=idx)
violation_list.append(v)
Expand Down Expand Up @@ -395,89 +393,60 @@ def _check_context(node: Node, expected_list: Dict[str, str]) -> List[Violation]
return violation_list


def _check_join_context(line_num, pos, tokens, token_index):
"""
valid contexts
[left outer join], [inner join] or [cross join]
Args:
line_num:
pos:
tokens:
token_index:

Returns:

"""
token = tokens[token_index]

if token.word.upper() != 'JOIN':
return []

# concat join contexts
join_contexts = [token.word]
for tk in reversed(tokens[:token_index]):
if tk.kind == Token.WHITESPACE:
continue

if tk.word.upper() in ['INNER', 'OUTER', 'LEFT', 'RIGHT', 'CROSS']:
join_contexts.insert(0, tk.word)
else:
break

join_context_str = ' '.join(join_contexts)
class LineChecker(Checker):
"""Checks violations about lines management.

if join_context_str.upper() not in ['LEFT OUTER JOIN', 'INNER JOIN', 'CROSS JOIN']:
return ['(L{}, {}): {}: {}'.format(line_num, pos, msg.MESSAGE_JOIN_CONTEXT, join_context_str)]
1. Checks whether two or more blank lines exist.

return []
2. Checks whether breaking line after specified keywords.
Examples:
--- Good ---
Select
x
From
y
------------


def _check_break_line(line_num, pos, tokens, token_index):
---- NG ----
Select x From y
------------
"""
break line at 'and', 'or', 'on' (except between A and B)
Args:
line_num:
pos:
tokens:
token_index:

Returns:

"""
token = tokens[token_index]
@staticmethod
def check(tree: SyntaxTree, config: ConfigLoader) -> List[Violation]:
result: List[Violation] = []

if token.word.upper() not in ['AND', 'OR', 'ON']:
return []
# 1. Checks whether two or more blank lines exist.
blank_count, v_list = LineChecker._check_blank_line(tree.root, blank_count=0)
if blank_count >= 2:
v_list
result.extend()

if _is_first_token(tokens, token_index):
return []
# 2. Checks whether breaking line after specified keywords.
# TODO: Implement

# if AND, search between context
if token.word.upper() == 'AND':
for tk in reversed(tokens[:token_index]):
if tk.word.upper() == 'AND':
break
if tk.word.upper() == 'BETWEEN':
return []
return result

return ['(L{}, {}): {}: {}'.format(line_num, pos, msg.MESSAGE_BREAK_LINE, token.word)]
@staticmethod
def _check_blank_line(node: Node, blank_count: int) -> Tuple[int, List[Violation]]:
violation_list: List[Violation] = []

count = len(node.contents)

def _is_first_token(tokens, token_index):
"""
Args:
tokens:
token_index:
is_blank = (count == 0)

Returns:
if count == 1 and node.contents[0].kind == Token.WHITESPACE:
violation_list.append(violation.OnlyWhitespaceViolation(node, index=0))
is_blank = True

"""
# If this line is not blank and 2 or more previous lines are blank, stack violation.
if is_blank:
blank_count += 1
elif blank_count >= 2:
violation_list.append(violation.MultiBlankLineViolation(node, index=0))

if token_index == 0:
return True
for leaf in node.leaves:
LineChecker._check_blank_line(tree.root, blank_count=0))

for token in tokens[:token_index]:
if token.kind != Token.WHITESPACE and token.kind != Token.COMMENT:
return False

return True
return blank_count, violation_list
80 changes: 5 additions & 75 deletions src/checker/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from src.config.config_loader import ConfigLoader


def check(tree: SyntaxTree, config: ConfigLoader):
def check(tree: SyntaxTree, config: ConfigLoader) -> List[Violation]:
"""Checks syntax tree and returns error messages

Args:
Expand All @@ -30,82 +30,12 @@ def check(tree: SyntaxTree, config: ConfigLoader):
# Check whether comma, which connects some columns or conditions, is head(end) of line.
base.CommaChecker,
# Check about join context
base.JoinChecker
base.JoinChecker,
# Check whether line-breaking before or after specified keywords.
base.LineChecker
]

for checker in checker_list:
violation_list.extend(checker.check(tree, config))

for vi in violation_list:
print(vi.get_message())

# parse sql context
# parsed_tokens = exec_parser(stmt)
"""
blank_line_num = 0
line_num = 1
for tokens in parsed_tokens:
# the position of a head of token in the line.
position = 1
# not suggest to break line as comma in bracket nest
bracket_nest_count = 0

# no tokens or only whitespace
if (len(tokens) == 0 or
(len(tokens) == 1 and tokens[0].kind == Token.WHITESPACE)):
blank_line_num += 1
else:
if blank_line_num >= 2:
result.append('(L{}, {}): {} ({})'.format(line_num, 0, 'too many blank lines', blank_line_num))
blank_line_num = 0

for i, token in enumerate(tokens):
# debug
# logger.debug('L{}:{}: {}'.format(line_num, position, token))

if token.kind == Token.COMMENT:
continue

if token.word == '(':
bracket_nest_count += 1
if token.word == ')':
bracket_nest_count -= 1

# Check whether a whitespace exists not before ) or after (
result.extend(_check_whitespace_brackets(line_num, position, tokens, i))

# Check whether a whitespace exist before and after binary operators
# (e.g.) =, <, >, <=. >=. <>, !=, +, -, *, /, %
result.extend(_check_whitespace_operators(line_num, position, tokens, i))

# Check whether the table name is on the same line as join context
result.extend(_check_join_table(line_num, position, tokens, i))

# Check whether join context is [left outer join], [inner join] or [cross join]
result.extend(_check_join_context(line_num, position, tokens, i))

# Check whether break line at 'and', 'or', 'on' (except between A and B)
if bracket_nest_count == 0:
result.extend(_check_break_line(line_num, position, tokens, i))

# TODO: Not Implemented yet.
# join on での不等号の順番(親テーブル < 日付)
# join on での改行
# 予約語ごとに、適切なインデントがされているか
# サブクエリは外に出す
# エイリアスが予約語と一致していないか
# Suggestion: 定数のハードコーディングをやめる

position += len(token)

line_num += 1

if blank_line_num >= 2:
result.append('(L{}, {}): {} ({})'.format(line_num, 0, 'too many blank lines', blank_line_num))

# Display 'Yey' message if there's no mistake.
if len(result) == 0:
result.append('Yey! You made no mistake🍺')

return result
"""
return violation_list
Loading