Skip to content

Commit

Permalink
fix(ingest): improve auto_materialize_referenced_tags_terms error h…
Browse files Browse the repository at this point in the history
…andling (#10906)
  • Loading branch information
hsheth2 authored Jul 13, 2024
1 parent a4bce6a commit ea7d6a9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
5 changes: 3 additions & 2 deletions metadata-ingestion/scripts/avro_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
init_coercion = ""
init_validation = ""
for field in fields:
init_validation += f'if not {field_name(field)}:\n raise InvalidUrnError("{field_name(field)} cannot be empty")\n'
init_validation += f'if not {field_name(field)}:\n raise InvalidUrnError("{class_name} {field_name(field)} cannot be empty")\n'

# Generalized mechanism for validating embedded urns.
field_urn_type_class = None
Expand All @@ -600,7 +600,8 @@ def generate_urn_class(entity_type: str, key_aspect: dict) -> str:
)
else:
init_validation += (
f"assert not UrnEncoder.contains_reserved_char({field_name(field)})\n"
f"if UrnEncoder.contains_reserved_char({field_name(field)}):\n"
f" raise InvalidUrnError(f'{class_name} {field_name(field)} contains reserved characters')\n"
)

if field_name(field) == "env":
Expand Down
18 changes: 12 additions & 6 deletions metadata-ingestion/src/datahub/ingestion/api/source_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from datahub.metadata.urns import DatasetUrn, GlossaryTermUrn, TagUrn, Urn
from datahub.specific.dataset import DatasetPatchBuilder
from datahub.telemetry import telemetry
from datahub.utilities.urns.error import InvalidUrnError
from datahub.utilities.urns.urn import guess_entity_type
from datahub.utilities.urns.urn_iter import list_urns, lowercase_dataset_urns

Expand Down Expand Up @@ -172,13 +173,18 @@ def auto_materialize_referenced_tags_terms(
yield wu

for urn in sorted(referenced_tags - tags_with_aspects):
urn_tp = Urn.from_string(urn)
assert isinstance(urn_tp, (TagUrn, GlossaryTermUrn))
try:
urn_tp = Urn.from_string(urn)
assert isinstance(urn_tp, (TagUrn, GlossaryTermUrn))

yield MetadataChangeProposalWrapper(
entityUrn=urn,
aspect=urn_tp.to_key_aspect(),
).as_workunit()
yield MetadataChangeProposalWrapper(
entityUrn=urn,
aspect=urn_tp.to_key_aspect(),
).as_workunit()
except InvalidUrnError:
logger.info(
f"Source produced an invalid urn, so no key aspect will be generated: {urn}"
)


def auto_lowercase_urns(
Expand Down

0 comments on commit ea7d6a9

Please sign in to comment.