Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/lookml): support view inheritance for fields #11148

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
IMPORTED_PROJECTS = "imported_projects"
DIMENSIONS = "dimensions"
MEASURES = "measures"
DIMENSION_GROUPS = "dimension_groups"
SQL_TABLE_NAME = "sql_table_name"
DATAHUB_TRANSFORMED_SQL_TABLE_NAME = "datahub_transformed_sql_table_name"
DERIVED_TABLE = "derived_table"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
find_view_from_resolved_includes,
)
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
from datahub.ingestion.source.looker.looker_constant import (
DIMENSION_GROUPS,
DIMENSIONS,
MEASURES,
)
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
from datahub.ingestion.source.looker.lookml_config import (
Expand All @@ -23,6 +28,39 @@
logger = logging.getLogger(__name__)


def merge_parent_and_child_fields(
child_fields: List[dict], parent_fields: List[dict]
) -> List[Dict]:
# Fetch the fields from the parent view, i.e., the view name mentioned in view.extends, and include those
# fields in child_fields. This inclusion will resolve the fields according to the precedence rules mentioned
# in the LookML documentation: https://cloud.google.com/looker/docs/reference/param-view-extends.

# Create a map field-name vs field
child_field_map: dict = {}
for field in child_fields:
assert (
NAME in field
), "A lookml view must have a name field" # name is required field of lookml field array

child_field_map[field[NAME]] = field

for field in parent_fields:
assert (
NAME in field
), "A lookml view must have a name field" # name is required field of lookml field array

if field[NAME] in child_field_map:
# Fields defined in the child view take higher precedence.
# This is an override case where the child has redefined the parent field.
# There are some additive attributes; however, we are not consuming them in metadata ingestion
# and hence not adding them to the child field.
continue

child_fields.append(field)

return child_fields


class LookerFieldContext:
raw_field: Dict[Any, Any]

Expand Down Expand Up @@ -248,23 +286,21 @@ def resolve_extends_view_name(
)
return None

def get_including_extends(
def _get_parent_attribute(
self,
field: str,
attribute_name: str,
) -> Optional[Any]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment for this method?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""
Search for the attribute_name in the parent views of the current view and return its value.
"""
extends = list(
itertools.chain.from_iterable(
self.raw_view.get("extends", self.raw_view.get("extends__all", []))
)
)

# First, check the current view.
if field in self.raw_view:
return self.raw_view[field]

# The field might be defined in another view and this view is extending that view,
# so we resolve this field while taking that into account.
# following Looker's precedence rules.
# Following Looker's precedence rules.
# reversed the view-names mentioned in `extends` attribute
for extend in reversed(extends):
assert extend != self.raw_view[NAME], "a view cannot extend itself"
extend_view = self.resolve_extends_view_name(
Expand All @@ -275,8 +311,33 @@ def get_including_extends(
f"failed to resolve extends view {extend} in view {self.raw_view[NAME]} of"
f" file {self.view_file.absolute_file_path}"
)
if field in extend_view:
return extend_view[field]
if attribute_name in extend_view:
return extend_view[attribute_name]

return None

def get_including_extends(
self,
field: str,
) -> Optional[Any]:

# According to Looker's inheritance rules, we need to merge the fields(i.e. dimensions, measures and
# dimension_groups) from both the child and parent.
if field in [DIMENSIONS, DIMENSION_GROUPS, MEASURES]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment about why this is the case - certain fields are merged by extends, whereas others are overridden

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# Get the child fields
child_fields = self._get_list_dict(field)
# merge parent and child fields
return merge_parent_and_child_fields(
child_fields=child_fields,
parent_fields=self._get_parent_attribute(attribute_name=field) or [],
)
else:
# Return the field from the current view if it exists.
if field in self.raw_view:
return self.raw_view[field]

# The field might be defined in another view, and this view is extending that view,
return self._get_parent_attribute(field)

return None

Expand Down Expand Up @@ -383,13 +444,13 @@ def _get_list_dict(self, attribute_name: str) -> List[Dict]:
return []

def dimensions(self) -> List[Dict]:
return self._get_list_dict("dimensions")
return self.get_including_extends(field=DIMENSIONS) or []

def measures(self) -> List[Dict]:
return self._get_list_dict("measures")
return self.get_including_extends(field=MEASURES) or []

def dimension_groups(self) -> List[Dict]:
return self._get_list_dict("dimension_groups")
return self.get_including_extends(field=DIMENSION_GROUPS) or []

def is_materialized_derived_view(self) -> bool:
for k in self.derived_table():
Expand Down Expand Up @@ -433,7 +494,7 @@ def is_sql_based_derived_case(self) -> bool:
return False

def is_native_derived_case(self) -> bool:
# It is pattern 5
# It is pattern 5, mentioned in Class documentation
if (
"derived_table" in self.raw_view
and "explore_source" in self.raw_view["derived_table"]
Expand All @@ -443,7 +504,7 @@ def is_native_derived_case(self) -> bool:
return False

def is_sql_based_derived_view_without_fields_case(self) -> bool:
# Pattern 6
# Pattern 6, mentioned in Class documentation
fields: List[Dict] = []

fields.extend(self.dimensions())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,195 @@
"dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD)",
"type": "VIEW"
}
],
"fineGrainedLineages": [
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),name)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),name)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),date)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),date)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),issue_date_3)"
],
"confidenceScore": 1.0
},
{
"upstreamType": "FIELD_SET",
"upstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),count)"
],
"downstreamType": "FIELD",
"downstreams": [
"urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD),count)"
],
"confidenceScore": 1.0
}
]
}
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "extend_book",
"platform": "urn:li:dataPlatform:looker",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown"
},
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.OtherSchema": {
"rawSchema": ""
}
},
"fields": [
{
"fieldPath": "name",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "date",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "issue_date",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "issue_date_3",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "number",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Dimension"
}
]
},
"isPartOfKey": false
},
{
"fieldPath": "count",
"nullable": false,
"description": "",
"label": "",
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "count",
"recursive": false,
"globalTags": {
"tags": [
{
"tag": "urn:li:tag:Measure"
}
]
},
"isPartOfKey": false
}
],
"primaryKeys": []
}
},
{
"com.linkedin.pegasus2avro.dataset.DatasetProperties": {
"customProperties": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
include: "parent_view.view.lkml"

view: child_view {
extends: [parent_view]

dimension: id {
primary_key: yes
type: integer
sql: ${TABLE}.id ;;
}

dimension: child_dimension_1 {
type: string
sql: ${TABLE}.child_dimension_1 ;;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ include: "employee_salary_rating.view.lkml"
include: "environment_activity_logs.view.lkml"
include: "employee_income_source_as_per_env.view.lkml"
include: "rent_as_employee_income_source.view.lkml"
include: "child_view.view.lkml"

explore: activity_logs {
}
Expand All @@ -35,4 +36,7 @@ explore: employee_income_source_as_per_env {
}

explore: rent_as_employee_income_source {
}

explore: child_view {
}
Loading
Loading