Closed sgomezvillamor closed 3 years ago
Pasting here whole exception
..................................................
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 222, in _handle_convert_column_evrs
192 def _handle_convert_column_evrs( # noqa: C901 (complexity)
193 self,
194 profile: DatasetProfileClass,
195 column: str,
196 col_evrs: Iterable[ExpectationValidationResult],
197 pretty_name: str,
198 ) -> None:
(...)
218 column_profile.uniqueProportion = res["observed_value"]
219 elif exp == "expect_column_values_to_not_be_null":
220 column_profile.nullCount = res["unexpected_count"]
221 if "unexpected_percent" in res:
--> 222 column_profile.nullProportion = res["unexpected_percent"] / 100
223 elif exp == "expect_column_values_to_not_match_regex":
..................................................
self = DatahubGEProfiler(data_context=<great_expectations.data_context.data_context.BaseDataContext object at 0x7fedc4491d90>,
report=SQLSourceReport(workunits_produced=177, workunit_ids=['dwh_sch.advertising.unsold', 'dwh_sch.advertising.sold', '
dwh_sch.advertising.advertising_budget_daily_csv', 'dwh_sch.advertising.advertising_budget_monthly_csv', 'dwh_sch.advert
ising.advertising_budget', 'dwh_sch.advertising.tmp_sold_distilled', 'dwh_sch.advertising.tmp_unsold_subito', 'dwh_sch.a
dvertising.tmp_unsold_distilled', 'dwh_sch.advertising.tmp_sold_subito', 'dwh_sch.advertising.tmp_unsold_willhaben', 'dw
h_sch.advertising.tmp_sold_willhaben', 'dwh_sch.advertising.unsold_1', 'dwh_sch.advertising.sold_1', 'dwh_sch.advertisin
g.adomik_distilled_sold', 'dwh_sch.advertising.adomik_subito_unsold', 'dwh_sch.advertising.adomik_distilled_unsold', 'dw
h_sch.advertising.adomik_willhaben_unsold', 'dwh_sch.advertising.adomik_subito_sold', 'dwh_sch.advertising.adomik_willha
ben_sold', 'dwh_sch.advertising.advertising_sales', 'dwh_sch.advertising.advertising_revenues', 'dwh_sch.advertising.adv
ertising_products', 'dwh_sch.advertising.advertising_impressions', 'profile-dwh_sch.advertising.unsold', 'profile-dwh_sc
...
profile = DatasetProfileClass({'timestampMillis': 1629375819592, 'rowCount': 0, 'columnCount': 9, 'fieldProfiles': [DatasetFieldPr
ofileClass({'fieldPath': 'sender_account_id', 'uniqueCount': 0, 'uniqueProportion': None, 'nullCount': 0, 'nullProportio
n': None, 'min': None, 'max': None, 'mean': None, 'median': None, 'stdev': None, 'quantiles': None, 'distinctValueFreque
ncies': None, 'histogram': None, 'sampleValues': None})]})
DatasetProfileClass = <class 'datahub.metadata.schema_classes.DatasetProfileClass'>
column = 'sender_account_id'
col_evrs = [
{
"expectation_config": {
"expectation_type": "expect_column_values_to_be_in_type_list",
"kwargs": {
"column": "sender_account_id",
"type_list": [
"CHAR",
"NCHAR",
"NTEXT",
"NVARCHAR",
"STRING",
"StringType",
"TEXT",
"VARCHAR",
"dtype('O')",
"object",
"str",
"string"
],
"result_format": "SUMMARY"
},
"meta": {
"BasicDatasetProfiler": {
"confidence": "very low"
}
}
},
"success": true,
"meta": {},
"exception_info": {
"raised_exception": false,
"exception_message": null,
"exception_traceback": null
},
"result": {
"observed_value": "VARCHAR"
}
},
{
"expectation_config": {
"expectation_type": "expect_column_unique_value_count_to_be_between",
"kwargs": {
"column": "sender_account_id",
"min_value": null,
"max_value": null,
"result_format": "SUMMARY"
},
"meta": {
"BasicDatasetProfiler": {
"confidence": "very low"
}
}
},
"success": true,
"meta": {},
"exception_info": {...
Iterable = typing.Iterable
ExpectationValidationResult = <class 'great_expectations.core.expectation_validation_result.ExpectationValidationResult'>
pretty_name = 'dwh_sch.ba.kufargo_adreplies'
column_profile.uniqueProportion = None
res = {'element_count': 0,
'unexpected_count': 0,
'unexpected_percent': None,
'unexpected_percent_total': None,
'partial_unexpected_list': []}
exp = 'expect_column_values_to_not_be_null'
column_profile.nullCount = 0
column_profile.nullProportion = None
..................................................
---- (full traceback above) ----
File "/usr/local/lib/python3.8/site-packages/datahub/entrypoints.py", line 91, in main
sys.exit(datahub(standalone_mode=False, **kwargs))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 782, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 610, in invoke
return callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/datahub/cli/ingest_cli.py", line 58, in run
pipeline.run()
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/run/pipeline.py", line 108, in run
for wu in self.source.get_workunits():
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/sql/sql_common.py", line 318, in get_workunits
yield from self.loop_profiler(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/sql/sql_common.py", line 513, in loop_profiler
profile = profiler.generate_profile(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 118, in generate_profile
profile = self._convert_evrs_to_profile(evrs, pretty_name=pretty_name)
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 165, in _convert_evrs_to_profile
self._handle_convert_column_evrs(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 222, in _handle_convert_column_evrs
column_profile.nullProportion = res["unexpected_percent"] / 100
TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
I can confirm that dwh_sch.ba.kufargo_adreplies
was empty when the profiling was executed.
Same as https://github.com/linkedin/datahub/issues/3130. I have also faced the same issue
Fixed in acryl-datahub>=0.8.10.2
Describe the bug
I’ve got the following error while running
redshift
connector with profiling enabled.To Reproduce
I just enabled
profiling
in the recipe of a redshift connector that was working fine. The profiling worked successfully for many tables, failed for one table with the exception above.Expected behavior
No fail.
Also, I was wondering if failing the profiling of one table should make the whole recipe fail 🤔 . Instead connector may go on with the next one.