enough-zoo-71516
11/11/2021, 2:05 PMenough-zoo-71516
11/11/2021, 2:07 PM"profiling": {
"enabled": True,
"profile_pattern": {
"deny": [".*_2020.*", ".*_2021.*", ".*_2022.*"],
"allow": ["metrics.latest"],
}
}
to just profile the metrics.latest
table. But i got this error:
Traceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/airflow/task/task_runner/standard_task_runner.py", line 85, in _start_by_fork
args.func(args, dag=self.dag)
File "/usr/local/lib/python3.9/site-packages/airflow/cli/cli_parser.py", line 48, in command
return func(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/airflow/utils/cli.py", line 92, in wrapper
return f(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/airflow/cli/commands/task_command.py", line 292, in task_run
_run_task_by_selected_method(args, dag, ti)
File "/usr/local/lib/python3.9/site-packages/airflow/cli/commands/task_command.py", line 107, in _run_task_by_selected_method
_run_raw_task(args, ti)
File "/usr/local/lib/python3.9/site-packages/airflow/cli/commands/task_command.py", line 180, in _run_raw_task
ti._run_raw_task(
File "/usr/local/lib/python3.9/site-packages/airflow/utils/session.py", line 70, in wrapper
return func(*args, session=session, **kwargs)
File "/usr/local/lib/python3.9/site-packages/airflow/models/taskinstance.py", line 1332, in _run_raw_task
self._execute_task_with_callbacks(context)
File "/usr/local/lib/python3.9/site-packages/airflow/models/taskinstance.py", line 1458, in _execute_task_with_callbacks
result = self._execute_task(context, self.task)
File "/usr/local/lib/python3.9/site-packages/airflow/models/taskinstance.py", line 1514, in _execute_task
result = execute_callable(context=context)
File "/usr/local/lib/python3.9/site-packages/airflow/operators/python.py", line 151, in execute
return_value = self.execute_callable()
File "/usr/local/lib/python3.9/site-packages/airflow/operators/python.py", line 162, in execute_callable
return self.python_callable(*self.op_args, **self.op_kwargs)
File "/usr/local/airflow/dags/datahub.bigquery.pipeline/datahub.bigquery.pipeline.py", line 40, in ingest_from_bigquery
pipeline = Pipeline.create(
File "/usr/local/lib/python3.9/site-packages/datahub/ingestion/run/pipeline.py", line 136, in create
return cls(config)
File "/usr/local/lib/python3.9/site-packages/datahub/ingestion/run/pipeline.py", line 104, in __init__
self.source: Source = source_class.create(
File "/usr/local/lib/python3.9/site-packages/datahub/ingestion/source/sql/bigquery.py", line 196, in create
config = BigQueryConfig.parse_obj(config_dict)
File "pydantic/main.py", line 578, in pydantic.main.BaseModel.parse_obj
File "pydantic/main.py", line 406, in pydantic.main.BaseModel.__init__
pydantic.error_wrappers.ValidationError: 1 validation error for BigQueryConfig
profiling -> profile_pattern
extra fields not permitted (type=value_error.extra)
loud-camera-71352
11/11/2021, 4:16 PMprofile_pattern:
deny: [".*_2020.*", ".*_2021.*", ".*_2022.*"]
allow: ["metrics.latest"]
profiling:
enabled: true
big-carpet-38439
11/11/2021, 4:29 PMenough-zoo-71516
11/11/2021, 4:30 PMenough-zoo-71516
11/11/2021, 7:38 PMTraceback (most recent call last):
File "/usr/local/lib/python3.9/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 551, in generate_profile
batch = self._get_ge_dataset(
File "/usr/local/lib/python3.9/site-packages/datahub/ingestion/source/ge_data_profiler.py", line 607, in _get_ge_dataset
batch = ge_context.data_context.get_batch(
File "/usr/local/lib/python3.9/site-packages/great_expectations/data_context/data_context.py", line 1551, in get_batch
return self._get_batch_v2(
File "/usr/local/lib/python3.9/site-packages/great_expectations/data_context/data_context.py", line 1262, in _get_batch_v2
return validator.get_dataset()
File "/usr/local/lib/python3.9/site-packages/great_expectations/validator/validator.py", line 1784, in get_dataset
return self.expectation_engine(
File "/usr/local/lib/python3.9/site-packages/great_expectations/dataset/sqlalchemy_dataset.py", line 609, in __init__
raise ValueError(
ValueError: No BigQuery dataset specified. Use bigquery_temp_table batch_kwarg or a specify a default dataset in engine url
enough-zoo-71516
11/11/2021, 7:39 PMenough-zoo-71516
11/11/2021, 7:41 PM{
"source": {
"type": "bigquery",
"config": {
"project_id": "***",
"env": "DEV",
"schema_pattern": {
"allow": [
"tmp", "data360", "posthog", "metrics"
],
},
"table_pattern": {
"deny": [".*_2020.*", ".*_2021.*", ".*_2022.*"],
},
"include_table_lineage": True,
"include_tables": True,
"include_views": True,
"profile_pattern": {
"deny": [".*_2020.*", ".*_2021.*", ".*_2022.*"],
"allow": ["metrics_latest"],
},
"profiling": {
"enabled": True,
"limit": 10,
}
},
},
"sink": {
"type": "datahub-rest",
"config": {"server": "https://***/gms", "token": datahub_api_key},
},
}
)
helpful-optician-78938
11/16/2021, 10:47 PM