Hi! I’ve got the following error while running `re...
# ingestion
w
Hi! I’ve got the following error while running
redshift
connector with profiling enabled.
Copy code
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 222, in _handle_convert_column_evrs
    column_profile.nullProportion = res["unexpected_percent"] / 100

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
Seems a bug, so I’m sharing here. I could create an issue in github if you prefer that.
More details. Hope it helps.
Copy code
..................................................

File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 222, in _handle_convert_column_evrs
    192  def _handle_convert_column_evrs(  # noqa: C901 (complexity)
    193      self,
    194      profile: DatasetProfileClass,
    195      column: str,
    196      col_evrs: Iterable[ExpectationValidationResult],
    197      pretty_name: str,
    198  ) -> None:
 (...)
    218              column_profile.uniqueProportion = res["observed_value"]
    219          elif exp == "expect_column_values_to_not_be_null":
    220              column_profile.nullCount = res["unexpected_count"]
    221              if "unexpected_percent" in res:
--> 222                  column_profile.nullProportion = res["unexpected_percent"] / 100
    223          elif exp == "expect_column_values_to_not_match_regex":
    ..................................................
     self = DatahubGEProfiler(data_context=<great_expectations.data_context.data_context.BaseDataContext object at 0x7fedc4491d90>, 
             report=SQLSourceReport(workunits_produced=177, workunit_ids=['dwh_sch.advertising.unsold', 'dwh_sch.advertising.sold', '
             dwh_sch.advertising.advertising_budget_daily_csv', 'dwh_sch.advertising.advertising_budget_monthly_csv', 'dwh_sch.advert
             ising.advertising_budget', 'dwh_sch.advertising.tmp_sold_distilled', 'dwh_sch.advertising.tmp_unsold_subito', 'dwh_sch.a
             dvertising.tmp_unsold_distilled', 'dwh_sch.advertising.tmp_sold_subito', 'dwh_sch.advertising.tmp_unsold_willhaben', 'dw
             h_sch.advertising.tmp_sold_willhaben', 'dwh_sch.advertising.unsold_1', 'dwh_sch.advertising.sold_1', 'dwh_sch.advertisin
             g.adomik_distilled_sold', 'dwh_sch.advertising.adomik_subito_unsold', 'dwh_sch.advertising.adomik_distilled_unsold', 'dw
             h_sch.advertising.adomik_willhaben_unsold', 'dwh_sch.advertising.adomik_subito_sold', 'dwh_sch.advertising.adomik_willha
             ben_sold', 'dwh_sch.advertising.advertising_sales', 'dwh_sch.advertising.advertising_revenues', 'dwh_sch.advertising.adv
             ertising_products', 'dwh_sch.advertising.advertising_impressions', 'profile-dwh_sch.advertising.unsold', 'profile-dwh_sc
             ...
     profile = DatasetProfileClass({'timestampMillis': 1629375819592, 'rowCount': 0, 'columnCount': 9, 'fieldProfiles': [DatasetFieldPr
                ofileClass({'fieldPath': 'sender_account_id', 'uniqueCount': 0, 'uniqueProportion': None, 'nullCount': 0, 'nullProportio
                n': None, 'min': None, 'max': None, 'mean': None, 'median': None, 'stdev': None, 'quantiles': None, 'distinctValueFreque
                ncies': None, 'histogram': None, 'sampleValues': None})]})
     DatasetProfileClass = <class 'datahub.metadata.schema_classes.DatasetProfileClass'>
     column = 'sender_account_id'
     col_evrs = [
                 {
                   "expectation_config": {
                     "expectation_type": "expect_column_values_to_be_in_type_list",
                     "kwargs": {
                       "column": "sender_account_id",
                       "type_list": [
                         "CHAR",
                         "NCHAR",
                         "NTEXT",
                         "NVARCHAR",
                         "STRING",
                         "StringType",
                         "TEXT",
                         "VARCHAR",
                         "dtype('O')",
                         "object",
                         "str",
                         "string"
                       ],
                       "result_format": "SUMMARY"
                     },
                     "meta": {
                       "BasicDatasetProfiler": {
                         "confidence": "very low"
                       }
                     }
                   },
                   "success": true,
                   "meta": {},
                   "exception_info": {
                     "raised_exception": false,
                     "exception_message": null,
                     "exception_traceback": null
                   },
                   "result": {
                     "observed_value": "VARCHAR"
                   }
                 }, 
                 {
                   "expectation_config": {
                     "expectation_type": "expect_column_unique_value_count_to_be_between",
                     "kwargs": {
                       "column": "sender_account_id",
                       "min_value": null,
                       "max_value": null,
                       "result_format": "SUMMARY"
                     },
                     "meta": {
                       "BasicDatasetProfiler": {
                         "confidence": "very low"
                       }
                     }
                   },
                   "success": true,
                   "meta": {},
                   "exception_info": {...
     Iterable = typing.Iterable
     ExpectationValidationResult = <class 'great_expectations.core.expectation_validation_result.ExpectationValidationResult'>
     pretty_name = 'dwh_sch.ba.kufargo_adreplies'
     column_profile.uniqueProportion = None
     res = {'element_count': 0,
            'unexpected_count': 0,
            'unexpected_percent': None,
            'unexpected_percent_total': None,
            'partial_unexpected_list': []}
     exp = 'expect_column_values_to_not_be_null'
     column_profile.nullCount = 0
     column_profile.nullProportion = None
    ..................................................

---- (full traceback above) ----
File "/usr/local/lib/python3.8/site-packages/datahub/entrypoints.py", line 91, in main
    sys.exit(datahub(standalone_mode=False, **kwargs))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 829, in __call__
    return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 782, in main
    rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1259, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1066, in invoke
    return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/site-packages/click/core.py", line 610, in invoke
    return callback(*args, **kwargs)
File "/usr/local/lib/python3.8/site-packages/datahub/cli/ingest_cli.py", line 58, in run
    pipeline.run()
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/run/pipeline.py", line 108, in run
    for wu in self.source.get_workunits():
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/sql/sql_common.py", line 318, in get_workunits
    yield from self.loop_profiler(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/sql/sql_common.py", line 513, in loop_profiler
    profile = profiler.generate_profile(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 118, in generate_profile
    profile = self._convert_evrs_to_profile(evrs, pretty_name=pretty_name)
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 165, in _convert_evrs_to_profile
    self._handle_convert_column_evrs(
File "/usr/local/lib/python3.8/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 222, in _handle_convert_column_evrs
    column_profile.nullProportion = res["unexpected_percent"] / 100

TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'
m
@witty-butcher-82399 an issue would be great!
c
I’m having the same issue
c
Is the table empty?
b
This one is a simple fix. Each of these parameters should be more defensive for cases where GE has nothing
w
I can confirm that table was empty when the profiling was executed.
b
Thank you Sergio! Would you be interested in taking the PR for thing?
w
m
thanks @witty-butcher-82399!
@witty-butcher-82399: dropped in a suggestion, can you check if that works with your test? (https://github.com/linkedin/datahub/pull/3144/files#r694093901)
w
Thanks @mammoth-bear-12532 for your feedback in the PR. I have just updated it.
m
thanks @witty-butcher-82399 will merge once CI passes 🙏
w
👍
m
@witty-butcher-82399: this should be available thru pip @ version 0.8.10.2 now.
👌 1
w