Skip to content

Commit 4811de1

Browse files
haeniyaYanik Häni
andauthored
feat(ingestion/tableau): hidden asset handling (#11559)
Co-authored-by: Yanik Häni <[email protected]>
1 parent 0e7ebaf commit 4811de1

15 files changed

+114839
-77
lines changed

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,18 @@ class TableauConfig(
485485
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
486486
)
487487

488+
ingest_hidden_assets: bool = Field(
489+
True,
490+
description="When enabled, hidden views and dashboards are ingested into Datahub. "
491+
"If a dashboard or view is hidden in Tableau the luid is blank. Default of this config field is True.",
492+
)
493+
494+
tags_for_hidden_assets: List[str] = Field(
495+
default=[],
496+
description="Tags to be added to hidden dashboards and views. If a dashboard or view is hidden in Tableau the luid is blank. "
497+
"This can only be used with ingest_tags enabled as it will overwrite tags entered from the UI.",
498+
)
499+
488500
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
489501
@root_validator(pre=True)
490502
def projects_backward_compatibility(cls, values: Dict) -> Dict:
@@ -510,6 +522,20 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict:
510522

511523
return values
512524

525+
@root_validator()
526+
def validate_config_values(cls, values: Dict) -> Dict:
527+
tags_for_hidden_assets = values.get("tags_for_hidden_assets")
528+
ingest_tags = values.get("ingest_tags")
529+
if (
530+
not ingest_tags
531+
and tags_for_hidden_assets
532+
and len(tags_for_hidden_assets) > 0
533+
):
534+
raise ValueError(
535+
"tags_for_hidden_assets is only allowed with ingest_tags enabled. Be aware that this will overwrite tags entered from the UI."
536+
)
537+
return values
538+
513539

514540
class WorkbookKey(ContainerKey):
515541
workbook_id: str
@@ -605,6 +631,7 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
605631
num_upstream_table_failed_generate_reference: int = 0
606632
num_upstream_table_lineage_failed_parse_sql: int = 0
607633
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
634+
num_hidden_assets_skipped: int = 0
608635

609636

610637
@platform_name("Tableau")
@@ -1051,6 +1078,11 @@ def get_data_platform_instance(self) -> DataPlatformInstanceClass:
10511078
),
10521079
)
10531080

1081+
def _is_hidden_view(self, dashboard_or_view: Dict) -> bool:
1082+
# LUID is blank if the view is hidden in the workbook.
1083+
# More info here: https://help.tableau.com/current/api/metadata_api/en-us/reference/view.doc.html
1084+
return not dashboard_or_view.get(c.LUID)
1085+
10541086
def get_connection_object_page(
10551087
self,
10561088
query: str,
@@ -2296,12 +2328,11 @@ def emit_datasource(
22962328
)
22972329

22982330
# Tags
2299-
if datasource_info:
2331+
if datasource_info and self.config.ingest_tags:
23002332
tags = self.get_tags(datasource_info)
2301-
if tags:
2302-
dataset_snapshot.aspects.append(
2303-
builder.make_global_tag_aspect_with_tag_list(tags)
2304-
)
2333+
dataset_snapshot.aspects.append(
2334+
builder.make_global_tag_aspect_with_tag_list(tags)
2335+
)
23052336

23062337
# Browse path
23072338
if browse_path and is_embedded_ds and workbook and workbook.get(c.NAME):
@@ -2692,7 +2723,13 @@ def emit_sheets(self) -> Iterable[MetadataWorkUnit]:
26922723
c.SHEETS_CONNECTION,
26932724
sheets_filter,
26942725
):
2695-
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
2726+
if self.config.ingest_hidden_assets or not self._is_hidden_view(sheet):
2727+
yield from self.emit_sheets_as_charts(sheet, sheet.get(c.WORKBOOK))
2728+
else:
2729+
self.report.num_hidden_assets_skipped += 1
2730+
logger.debug(
2731+
f"Skip view {sheet.get(c.ID)} because it's hidden (luid is blank)."
2732+
)
26962733

26972734
def emit_sheets_as_charts(
26982735
self, sheet: dict, workbook: Optional[Dict]
@@ -2783,11 +2820,17 @@ def emit_sheets_as_charts(
27832820
chart_snapshot.aspects.append(owner)
27842821

27852822
# Tags
2786-
tags = self.get_tags(sheet)
2787-
if tags:
2823+
if self.config.ingest_tags:
2824+
tags = self.get_tags(sheet)
2825+
if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(
2826+
sheet
2827+
):
2828+
tags.extend(self.config.tags_for_hidden_assets)
2829+
27882830
chart_snapshot.aspects.append(
27892831
builder.make_global_tag_aspect_with_tag_list(tags)
27902832
)
2833+
27912834
yield self.get_metadata_change_event(chart_snapshot)
27922835
if sheet_external_url is not None and self.config.ingest_embed_url is True:
27932836
yield self.new_work_unit(
@@ -2869,7 +2912,7 @@ def emit_workbook_as_container(self, workbook: Dict) -> Iterable[MetadataWorkUni
28692912
else None
28702913
)
28712914

2872-
tags = self.get_tags(workbook)
2915+
tags = self.get_tags(workbook) if self.config.ingest_tags else None
28732916

28742917
parent_key = None
28752918
project_luid: Optional[str] = self._get_workbook_project_luid(workbook)
@@ -3000,17 +3043,23 @@ def emit_dashboards(self) -> Iterable[MetadataWorkUnit]:
30003043
c.DASHBOARDS_CONNECTION,
30013044
dashboards_filter,
30023045
):
3003-
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
3046+
if self.config.ingest_hidden_assets or not self._is_hidden_view(dashboard):
3047+
yield from self.emit_dashboard(dashboard, dashboard.get(c.WORKBOOK))
3048+
else:
3049+
self.report.num_hidden_assets_skipped += 1
3050+
logger.debug(
3051+
f"Skip dashboard {dashboard.get(c.ID)} because it's hidden (luid is blank)."
3052+
)
30043053

3005-
def get_tags(self, obj: dict) -> Optional[List[str]]:
3054+
def get_tags(self, obj: dict) -> List[str]:
30063055
tag_list = obj.get(c.TAGS, [])
3007-
if tag_list and self.config.ingest_tags:
3056+
if tag_list:
30083057
tag_list_str = [
30093058
t[c.NAME] for t in tag_list if t is not None and t.get(c.NAME)
30103059
]
30113060

30123061
return tag_list_str
3013-
return None
3062+
return []
30143063

30153064
def emit_dashboard(
30163065
self, dashboard: dict, workbook: Optional[Dict]
@@ -3061,8 +3110,13 @@ def emit_dashboard(
30613110
)
30623111
dashboard_snapshot.aspects.append(dashboard_info_class)
30633112

3064-
tags = self.get_tags(dashboard)
3065-
if tags:
3113+
if self.config.ingest_tags:
3114+
tags = self.get_tags(dashboard)
3115+
if len(self.config.tags_for_hidden_assets) > 0 and self._is_hidden_view(
3116+
dashboard
3117+
):
3118+
tags.extend(self.config.tags_for_hidden_assets)
3119+
30663120
dashboard_snapshot.aspects.append(
30673121
builder.make_global_tag_aspect_with_tag_list(tags)
30683122
)

0 commit comments

Comments
 (0)