Skip to content

Commit

Permalink
fix(ingest): tableau - omit schema fields when name is absent (#5275)
Browse files Browse the repository at this point in the history
  • Loading branch information
mayurinehate authored Jul 5, 2022
1 parent e93e469 commit f23c2c9
Show file tree
Hide file tree
Showing 4 changed files with 50,171 additions and 50,790 deletions.
29 changes: 18 additions & 11 deletions metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def emit_workbooks(self) -> Iterable[MetadataWorkUnit]:
yield from self.emit_sheets_as_charts(workbook)
yield from self.emit_dashboards(workbook)
yield from self.emit_embedded_datasource(workbook)
yield from self.emit_upstream_tables()
yield from self.emit_upstream_tables()

def _track_custom_sql_ids(self, field: dict) -> None:
# Tableau shows custom sql datasource as a table in ColumnField.
Expand Down Expand Up @@ -551,28 +551,31 @@ def emit_custom_sql_datasources(self) -> Iterable[MetadataWorkUnit]:
def get_schema_metadata_for_custom_sql(
self, columns: List[dict]
) -> Optional[SchemaMetadata]:
fields = []
schema_metadata = None
for field in columns:
# Datasource fields
fields = []

if field.get("name") is None:
continue
nativeDataType = field.get("remoteType", "UNKNOWN")
TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
schema_field = SchemaField(
fieldPath=field.get("name", ""),
fieldPath=field["name"],
type=SchemaFieldDataType(type=TypeClass()),
nativeDataType=nativeDataType,
description=field.get("description", ""),
)
fields.append(schema_field)

schema_metadata = SchemaMetadata(
schemaName="test",
platform=f"urn:li:dataPlatform:{self.platform}",
version=0,
fields=fields,
hash="",
platformSchema=OtherSchema(rawSchema=""),
)
schema_metadata = SchemaMetadata(
schemaName="test",
platform=f"urn:li:dataPlatform:{self.platform}",
version=0,
fields=fields,
hash="",
platformSchema=OtherSchema(rawSchema=""),
)
return schema_metadata

def _create_lineage_from_csql_datasource(
Expand Down Expand Up @@ -634,6 +637,8 @@ def _get_schema_metadata_for_datasource(
for field in datasource_fields:
# check datasource - custom sql relations from a field being referenced
self._track_custom_sql_ids(field)
if field.get("name") is None:
continue

nativeDataType = field.get("dataType", "UNKNOWN")
TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)
Expand Down Expand Up @@ -865,6 +870,8 @@ def emit_upstream_tables(self) -> Iterable[MetadataWorkUnit]:
if columns:
fields = []
for field in columns:
if field.get("name") is None:
continue
nativeDataType = field.get("remoteType", "UNKNOWN")
TypeClass = FIELD_TYPE_MAPPING.get(nativeDataType, NullTypeClass)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1522,6 +1522,24 @@
}
]
},
{
"__typename": "ColumnField",
"id": "xxxdc8ee-30e5-644b-cf76-48a3dea79cda",
"name": null,
"description": null,
"isHidden": false,
"folderName": null,
"dataCategory": "NOMINAL",
"role": "DIMENSION",
"dataType": "STRING",
"defaultFormat": null,
"aggregation": null,
"columns": [
{
"table": {}
}
]
},
{
"__typename": "ColumnField",
"id": "277dc8ee-30e5-644b-cf76-48a3dea79cda",
Expand Down
Loading

0 comments on commit f23c2c9

Please sign in to comment.