57
57
convert_to_cardinality ,
58
58
)
59
59
from datahub .ingestion .source .sql .sql_report import SQLSourceReport
60
- from datahub .metadata .com .linkedin .pegasus2avro .schema import EditableSchemaMetadata
60
+ from datahub .ingestion .source .sql .sql_types import resolve_sql_type
61
+ from datahub .metadata .com .linkedin .pegasus2avro .schema import (
62
+ EditableSchemaMetadata ,
63
+ NumberType ,
64
+ )
61
65
from datahub .metadata .schema_classes import (
62
66
DatasetFieldProfileClass ,
63
67
DatasetProfileClass ,
@@ -361,6 +365,8 @@ class _SingleDatasetProfiler(BasicDatasetProfilerBase):
361
365
platform : str
362
366
env : str
363
367
368
+ column_types : Dict [str , str ] = dataclasses .field (default_factory = dict )
369
+
364
370
def _get_columns_to_profile (self ) -> List [str ]:
365
371
if not self .config .any_field_level_metrics_enabled ():
366
372
return []
@@ -374,6 +380,7 @@ def _get_columns_to_profile(self) -> List[str]:
374
380
375
381
for col_dict in self .dataset .columns :
376
382
col = col_dict ["name" ]
383
+ self .column_types [col ] = str (col_dict ["type" ])
377
384
# We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
378
385
if not self .config ._allow_deny_patterns .allowed (
379
386
f"{ self .dataset_name } .{ col } "
@@ -430,6 +437,21 @@ def _get_column_type(self, column_spec: _SingleColumnSpec, column: str) -> None:
430
437
self .dataset , column
431
438
)
432
439
440
+ if column_spec .type_ == ProfilerDataType .UNKNOWN :
441
+ try :
442
+ datahub_field_type = resolve_sql_type (
443
+ self .column_types [column ], self .dataset .engine .dialect .name .lower ()
444
+ )
445
+ except Exception as e :
446
+ logger .debug (
447
+ f"Error resolving sql type { self .column_types [column ]} : { e } "
448
+ )
449
+ datahub_field_type = None
450
+ if datahub_field_type is None :
451
+ return
452
+ if isinstance (datahub_field_type , NumberType ):
453
+ column_spec .type_ = ProfilerDataType .NUMERIC
454
+
433
455
@_run_with_query_combiner
434
456
def _get_column_cardinality (
435
457
self , column_spec : _SingleColumnSpec , column : str
0 commit comments