brave-businessperson-3969
04/06/2022, 10:40 AMfrom datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.metadata.schema_classes import (
SchemaMetadataClass,
EditableSchemaMetadataClass
)
gms_endpoint = https://<url_of_the_datahub_installation>/api/gms
dataset_urn = <some existing table urn in datahub> #e.g. "urn:li:dataset:(urn:li:dataPlatform:postgres,pagila.public.actor,PROD)"
datahub_uplink = DataHubGraph(config=DatahubClientConfig(server=gms_endpoint, token=datahub_token))
# Following code throws
# ValueError(f'{readers_schema.fullname} contains extra fields: {input_keys}')
# ValueError: com.linkedin.pegasus2avro.schema.Schemaless contains extra fields:
# {'com.linkedin.schema.MySqlDDL'}input_keys}')"
schema_metadata = datahub_uplink.get_aspect_v2(
entity_urn=dataset_urn,
aspect="schemaMetadata",
aspect_type=SchemaMetadataClass,
)
# This works (quering a different aspect):
# schema_metadata = datahub_uplink.get_aspect_v2(
# entity_urn=dataset_urn,
# aspect="editableSchemaMetadata",
# aspect_type=EditableSchemaMetadataClass,
# )
Full error:
Traceback (most recent call last):
File "error.py", line 20, in <module>
schema_metadata = datahub_uplink.get_aspect_v2(
File "/home/uwest/venv/lib/python3.8/site-packages/datahub/ingestion/graph/client.py", line 148, in get_aspect_v2
return aspect_type.from_obj(aspect_json, tuples=True)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/dict_wrapper.py", line 41, in from_obj
return conv.from_json_object(obj, cls.RECORD_SCHEMA)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 104, in from_json_object
return self._generic_from_json(json_obj, writers_schema, readers_schema)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 257, in _generic_from_json
result = self._record_from_json(json_obj, writers_schema, readers_schema)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 345, in _record_from_json
field_value = self._generic_from_json(json_obj[field.name], writers_field.type, field.type)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 255, in _generic_from_json
result = self._union_from_json(json_obj, writers_schema, readers_schema)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 313, in _union_from_json
return self._generic_from_json(json_obj, s, readers_schema)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 238, in _generic_from_json
return self._generic_from_json(json_obj, writers_schema, s)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 257, in _generic_from_json
result = self._record_from_json(json_obj, writers_schema, readers_schema)
File "/home/uwest/venv/lib/python3.8/site-packages/avrogen/avrojson.py", line 358, in _record_from_json
raise ValueError(f'{readers_schema.fullname} contains extra fields: {input_keys}')
ValueError: com.linkedin.pegasus2avro.schema.Schemaless contains extra fields: {'com.linkedin.schema.MySqlDDL'}
loud-island-88694
helpful-optician-78938
04/07/2022, 11:46 PMaspect_type_name="com.linkedin.schema.SchemaMetadata"
and try? If that doesn't work, I'll work on a fix. In the meantime, you can unblock yourself using this alternate API similar to datahub cli.helpful-optician-78938
04/08/2022, 12:54 AMbrave-businessperson-3969
04/08/2022, 4:14 PMbrave-businessperson-3969
04/19/2022, 2:45 PM