Skip to content

Commit

Permalink
fix tconbeer#471: handle nested dictionary in jinja expression
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd authored and tconbeer committed Oct 20, 2023
1 parent c2c5fc7 commit 6d43942
Show file tree
Hide file tree
Showing 10 changed files with 359 additions and 2 deletions.
88 changes: 87 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ tomli = { version = "^2.0.1", python = "<3.11" }
black = { version = "*", optional = true }

gitpython = { version = "^3.1.24", optional = true }
jinja2 = "^3.1.2"

[tool.poetry.group.dev.dependencies]
pre-commit = ">=2.20,<4.0"
Expand Down
53 changes: 52 additions & 1 deletion src/sqlfmt/actions.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import re
from typing import TYPE_CHECKING, Callable, List, Optional

from jinja2 import Environment
from jinja2.nodes import Const, Dict, Keyword, Pair

from sqlfmt.comment import Comment
from sqlfmt.exception import SqlfmtBracketError, StopRulesetLexing
from sqlfmt.line import Line
from sqlfmt.node import Node, get_previous_token
from sqlfmt.rule import MAYBE_WHITESPACES, Rule
from sqlfmt.rule import MAYBE_NESTED_DICTIONARY, MAYBE_WHITESPACES, Rule
from sqlfmt.token import Token, TokenType

if TYPE_CHECKING:
Expand Down Expand Up @@ -486,6 +489,44 @@ def handle_jinja(
raise StopRulesetLexing


def jinja_to_dict(obj: Keyword | Dict | Pair | Const) -> dict | Const:
"""
Parses a Jinja keyword with a dictionary value,
and returns a Python dictionary
"""
if isinstance(obj, Keyword):
return {obj.key: jinja_to_dict(obj.value)}
elif isinstance(obj, Dict):
return {pair.key.value: jinja_to_dict(pair.value) for pair in obj.items}
elif isinstance(obj, Pair):
return {obj.key.value: jinja_to_dict(obj.value)}
elif isinstance(obj, Const):
return obj.value
return None


def extract_nested_dictionary(source_string: str) -> str | None:
"""
Iterates over a Jinja template to extract nested dictionaries
Returns None if no actual dictionary is matched,
e.g. a false positive like a string with "}}
"""
env = Environment()
parsed_template = env.parse(source_string)
for outer_node in parsed_template.body:
for node in outer_node.iter_child_nodes():
if hasattr(node, "kwargs"):
keywords = node.kwargs
for keyword in keywords:
if isinstance(keyword.value, Dict):
dict_pattern = rf"(?<={keyword.key}=)" + r"{.*?}+"
dict_result = re.search(dict_pattern, source_string, re.DOTALL)
if dict_result:
return dict_result[0]
return None


def handle_potentially_nested_tokens(
analyzer: "Analyzer",
source_string: str,
Expand All @@ -501,6 +542,16 @@ def handle_potentially_nested_tokens(
"""
start_rule = analyzer.get_rule(rule_name=start_name)
end_rule = analyzer.get_rule(rule_name=end_name)

# check if template contains a nested dictionary that can be
# misinterpreted as a Jinja expression ending
if re.search(MAYBE_NESTED_DICTIONARY, source_string):
if nested_dictionary := extract_nested_dictionary(source_string):
# use a positive lookbehind to exclude the nested dictionary as a match
# use an additional negative lookbehind to exclude } from match
nested_dict_pattern = rf"(?<!{nested_dictionary})".replace("}", "")
end_rule.pattern = nested_dict_pattern + r"(?<!})" + end_rule.pattern

# extract properties from matching start of token
pos, _ = match.span(0)
spos, epos = match.span(1)
Expand Down
1 change: 1 addition & 0 deletions src/sqlfmt/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sqlfmt.analyzer import Analyzer

MAYBE_WHITESPACES: str = r"[^\S\n]*" # any whitespace except newline
MAYBE_NESTED_DICTIONARY: str = r"\"}}|'}}"


@dataclass
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
-- this could be a nested dictionary, since it contains `"}}`
{{"foo"}}
)))))__SQLFMT_OUTPUT__(((((
-- this could be a nested dictionary, since it contains `"}}`
{{ "foo" }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- source: https://github.com/tconbeer/sqlfmt/issues/471
{{
config(
materialized='incremental',
incremental_strategy='insert_overwrite',
external_location=generate_external_location("baz", "datalake/producer=foo/object=bar"),
partitioned_by=['batch_date'],
lf_tags_config={
'enabled': true,
'tags': {
'domain': 'foo',
'sensitivity': 'public'
}
}
)
}}
select
batch_date
from
{{ ref('bar') }}
{% if is_incremental() %}
where batch_date = '{{ var("ds") }}'
{% endif %}
)))))__SQLFMT_OUTPUT__(((((
-- source: https://github.com/tconbeer/sqlfmt/issues/471
{{
config(
materialized="incremental",
incremental_strategy="insert_overwrite",
external_location=generate_external_location("baz", "datalake/producer=foo/object=bar"),
partitioned_by=["batch_date"],
lf_tags_config={"enabled": true, "tags": {"domain": "foo", "sensitivity": "public"}},
)
}}
select batch_date
from {{ ref("bar") }}
{% if is_incremental() %} where batch_date = '{{ var("ds") }}' {% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{{
config(
materialized='incremental',
incremental_strategy='insert_overwrite',
external_location=generate_external_location('baz', 'datalake/producer=foo/object=bar'),
partitioned_by=['batch_date'],
lf_tags_config={
'enabled': true,
'tags': {
'nested_layer_0':
{'nested_layer_1':
{'nested_layer_2':{'foo':'bar'}}},
'domain': 'foo',
'sensitivity': 'public',
},
},
)
}}
select batch_date
from {{ ref("bar") }}
{% if is_incremental() %} where batch_date = '{{ var("ds") }}' {% endif %}
)))))__SQLFMT_OUTPUT__(((((
{{
config(
materialized="incremental",
incremental_strategy="insert_overwrite",
external_location=generate_external_location("baz", "datalake/producer=foo/object=bar"),
partitioned_by=["batch_date"],
lf_tags_config={
"enabled": true,
"tags": {
"nested_layer_0": {"nested_layer_1": {"nested_layer_2": {"foo": "bar"}}},
"domain": "foo",
"sensitivity": "public",
},
},
)
}}
select batch_date
from {{ ref("bar") }}
{% if is_incremental() %} where batch_date = '{{ var("ds") }}' {% endif %}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{{
config(
materialized='incremental',
incremental_strategy='insert_overwrite',
external_location=generate_external_location('baz', 'datalake/producer=foo/object=bar'),
partitioned_by=['batch_date'],
lf_tags_config={
'enabled': true,
'tags': {
'nested_layer_0':
{'nested_layer_1':
{'nested_layer_2':{'foo':'bar'}}},
'domain': 'foo',
'sensitivity': 'public',
},
},
)
}}
select batch_date
from {{ ref("bar") }}
{% if is_incremental() %} where batch_date = '{{ var("ds") }}' {% endif %}
)))))__SQLFMT_OUTPUT__(((((
{{
config(
materialized="incremental",
incremental_strategy="insert_overwrite",
external_location=generate_external_location(
"baz", "datalake/producer=foo/object=bar"
),
partitioned_by=["batch_date"],
lf_tags_config={
"enabled": true,
"tags": {
"nested_layer_0": {
"nested_layer_1": {"nested_layer_2": {"foo": "bar"}}
},
"domain": "foo",
"sensitivity": "public",
},
},
)
}}
select batch_date
from {{ ref("bar") }}
{% if is_incremental() %} where batch_date = '{{ var("ds") }}' {% endif %}
Loading

0 comments on commit 6d43942

Please sign in to comment.