jolly-book-3043
11/18/2022, 9:38 PMSuccessfully submitted ingestion execution request!
but nothing is triggered. i’ve restarted all containers and still no dice. any tips?red-waitress-53338
11/20/2022, 12:13 AMcurl --location --request POST '<http://localhost:8080/entities?action=search>' \
-raw '{
"input":> --header 'X-RestLi-Protocol-Version: 2.0.0' \
"*",
"e> --header 'Content-Type: application/json' \
> --data-raw '{
> "input": "*",
> "entity": "dataset",
"sta> "start": 0,
count":> "count": 1000
> }'
Getting the error:
{
"exceptionClass": "com.linkedin.restli.server.RestLiServiceException",
"stackTrace": "com.linkedin.restli.server.RestLiServiceException [HTTP Status:404]: No root resource defined for path '/entities'\n\tat com.linkedin.restli.server.RestLiServiceException.fromThrowable(RestLiServiceException.java:315)\n\tat com.linkedin.restli.server.BaseRestLiServer.buildPreRoutingError(BaseRestLiServer.java:158)\n\tat com.linkedin.restli.server.RestRestLiServer.buildPreRoutingRestException(RestRestLiServer.java:203)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:177)\n\tat com.linkedin.restli.server.RestRestLiServer.doHandleRequest(RestRestLiServer.java:164)\n\tat com.linkedin.restli.server.RestRestLiServer.handleRequest(RestRestLiServer.java:120)\n\tat com.linkedin.restli.server.RestLiServer.handleRequest(RestLiServer.java:132)\n\tat com.linkedin.restli.server.DelegatingTransportDispatcher.handleRestRequest(DelegatingTransportDispatcher.java:70)\n\tat com.linkedin.r2.filter.transport.DispatcherRequestFilter.onRestRequest(DispatcherRequestFilter.java:70)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.transport.ServerQueryTunnelFilter.onRestRequest(ServerQueryTunnelFilter.java:58)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.TimedNextFilter.onRequest(TimedNextFilter.java:55)\n\tat com.linkedin.r2.filter.message.rest.RestFilter.onRestRequest(RestFilter.java:50)\n\tat com.linkedin.r2.filter.TimedRestFilter.onRestRequest(TimedRestFilter.java:72)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:146)\n\tat com.linkedin.r2.filter.FilterChainIterator$FilterChainRestIterator.doOnRequest(FilterChainIterator.java:132)\n\tat com.linkedin.r2.filter.FilterChainIterator.onRequest(FilterChainIterator.java:62)\n\tat com.linkedin.r2.filter.FilterChainImpl.onRestRequest(FilterChainImpl.java:96)\n\tat com.linkedin.r2.filter.transport.FilterChainDispatcher.handleRestRequest(FilterChainDispatcher.java:75)\n\tat com.linkedin.r2.util.finalizer.RequestFinalizerDispatcher.handleRestRequest(RequestFinalizerDispatcher.java:61)\n\tat com.linkedin.r2.transport.http.server.HttpDispatcher.handleRequest(HttpDispatcher.java:101)\n\tat com.linkedin.r2.transport.http.server.AbstractR2Servlet.service(AbstractR2Servlet.java:105)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat com.linkedin.restli.server.spring.ParallelRestliHttpRequestHandler.handleRequest(ParallelRestliHttpRequestHandler.java:61)\n\tat org.springframework.web.context.support.HttpRequestHandlerServlet.service(HttpRequestHandlerServlet.java:73)\n\tat javax.servlet.http.HttpServlet.service(HttpServlet.java:790)\n\tat org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:852)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:544)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:536)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1581)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1307)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:482)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1549)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1204)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:221)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:494)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:374)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:268)\n\tat org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)\n\tat org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:103)\n\tat org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:336)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:313)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:171)\n\tat org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:129)\n\tat org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:367)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:782)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:918)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: com.linkedin.restli.server.RoutingException: No root resource defined for path '/entities'\n\tat com.linkedin.restli.internal.server.RestLiRouter.process(RestLiRouter.java:139)\n\tat com.linkedin.restli.server.BaseRestLiServer.getRoutingResult(BaseRestLiServer.java:139)\n\tat com.linkedin.restli.server.RestRestLiServer.handleResourceRequest(RestRestLiServer.java:173)\n\t... 62 more\n",
"message": "No root resource defined for path '/entities'",
"status": 404
}
late-ability-59580
11/20/2022, 2:22 PMbillowy-pilot-93812
11/21/2022, 4:58 AMrich-policeman-92383
11/21/2022, 6:37 AMmodern-artist-55754
11/21/2022, 7:49 AMmillions-carpet-50697
11/21/2022, 8:03 AM2022-11-21 03:10:16,113 ERROR [spark-listener-group-shared] spark.DatahubSparkListener (DatahubSparkListener.java:onOtherEvent(273)): java.lang.NullPointerException
at datahub.spark.DatasetExtractor.lambda$static$6(DatasetExtractor.java:147)
at datahub.spark.DatasetExtractor.asDataset(DatasetExtractor.java:237)
at datahub.spark.DatahubSparkListener$SqlStartTask.run(DatahubSparkListener.java:114)
at datahub.spark.DatahubSparkListener.processExecution(DatahubSparkListener.java:350)
at datahub.spark.DatahubSparkListener.onOtherEvent(DatahubSparkListener.java:262)
at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:100)
at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28)
at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:37)
at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:117)
at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:101)
at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:105)
at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:105)
at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.java:12)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
at <http://org.apache.spark.scheduler.AsyncEventQueue.org|org.apache.spark.scheduler.AsyncEventQueue.org>$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:100)
at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:96)
at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1381)
at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:96)
2.UnsatisfiedLinkError
Exception in thread "map-output-dispatcher-0" java.lang.UnsatisfiedLinkError: com.github.luben.zstd.Zstd.setCompressionLevel(JI)I
at com.github.luben.zstd.Zstd.setCompressionLevel(Native Method)
at com.github.luben.zstd.ZstdOutputStream.<init>(ZstdOutputStream.java:67)
at org.apache.spark.io.ZStdCompressionCodec.compressedOutputStream(CompressionCodec.scala:223)
at org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:903)
at org.apache.spark.ShuffleStatus.$anonfun$serializedMapStatus$2(MapOutputTracker.scala:233)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
at org.apache.spark.ShuffleStatus.withWriteLock(MapOutputTracker.scala:72)
at org.apache.spark.ShuffleStatus.serializedMapStatus(MapOutputTracker.scala:230)
at org.apache.spark.MapOutputTrackerMaster$MessageLoop.run(MapOutputTracker.scala:466)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
And I found a Git issue exactly match the error I met, but seems it would not be fixed. https://github.com/datahub-project/datahub/issues/5979.
Could anyone help? Thanks a lot.cool-tiger-42613
11/21/2022, 8:43 AMTime
is updated as current datetime rather than what I give as input. Can I get help on this?average-baker-96343
11/21/2022, 8:48 AMaverage-baker-96343
11/21/2022, 8:53 AMcreamy-pizza-80433
11/21/2022, 11:44 AMmammoth-gigabyte-6392
11/21/2022, 12:48 PMrich-policeman-92383
11/21/2022, 1:09 PMhigh-hospital-85984
11/21/2022, 5:33 PMeditableSchemaMetadata
but then we basically remove the ability to make UI edits (as they might get over written). It’s not a huge deal for us, but the approach feels like a hack. Any better ideas?lively-dusk-19162
11/21/2022, 6:09 PMlively-dusk-19162
11/21/2022, 6:16 PMlittle-breakfast-38102
11/21/2022, 6:33 PMlemon-musician-50603
11/21/2022, 8:04 PMminiature-painting-28571
11/21/2022, 10:19 PMminiature-painting-28571
11/21/2022, 10:19 PMmodern-artist-55754
11/22/2022, 1:05 AM_dbt_tmp
table and the destination table.
Is there any way to get around that? Before column level lineage, we just suppressed the _dbt_tmp
tables completely, and use dbt for the table lineage, but now with column level linage the _dbt_tmp
show up again and column level linage isn’t working because dbt has not been supported.ancient-policeman-73437
11/22/2022, 7:23 AMlimited-forest-73733
11/22/2022, 7:45 AMsteep-family-13549
11/22/2022, 10:16 AMkind-scientist-44426
11/22/2022, 10:57 AM[2022-11-22 10:46:51,215] ERROR {datahub.ingestion.source.bigquery_v2.bigquery:557} - Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/datahub/ingestion/source/bigquery_v2/bigquery.py", line 554, in _process_project
yield from self._process_schema(conn, project_id, bigquery_dataset)
File "/usr/local/lib/python3.10/site-packages/datahub/ingestion/source/bigquery_v2/bigquery.py", line 604, in _process_schema
yield from self._process_table(conn, table, project_id, dataset_name)
File "/usr/local/lib/python3.10/site-packages/datahub/ingestion/source/bigquery_v2/bigquery.py", line 650, in _process_table
for wu in table_workunits:
File "/usr/local/lib/python3.10/site-packages/datahub/ingestion/source/bigquery_v2/bigquery.py", line 720, in gen_table_dataset_workunits
custom_properties["time_partitioning"] = str(str(table.time_partitioning))
File "/usr/local/lib/python3.10/site-packages/google/cloud/bigquery/table.py", line 2659, in __repr__
key_vals = ["{}={}".format(key, val) for key, val in self._key()]
File "/usr/local/lib/python3.10/site-packages/google/cloud/bigquery/table.py", line 2635, in _key
properties["type_"] = repr(properties.pop("type"))
KeyError: 'type'
[2022-11-22 10:46:51,215] ERROR {datahub.ingestion.source.bigquery_v2.bigquery:558} - Unable to get tables for dataset DB in project project51, skipping. The error was: 'type'
Can someone help with it.dazzling-park-96517
11/22/2022, 11:57 AM'--- Logging error ---\n'
'Traceback (most recent call last):\n'
' File "/tmp/datahub/ingest/venv-trino-0.9.1/lib/python3.10/site-packages/datahub/ingestion/run/pipeline.py", line 362, in run\n'
' for record_envelope in self.transform(record_envelopes):\n'
' File "/tmp/datahub/ingest/venv-trino-0.9.1/lib/python3.10/site-packages/datahub/ingestion/extractor/mce_extractor.py", line 76, in '
'get_records\n'
' raise ValueError(\n'
"ValueError: source produced an invalid metadata work unit: MetadataChangeEventClass({'auditHeader': None, 'proposedSnapshot': "
"DatasetSnapshotClass({'urn': 'urn:li:dataset:(urn:li:dataPlatform:trino,catalog.schema
.test_table
,PROD)', 'aspects': "
"[StatusClass({'removed': False}), DatasetPropertiesClass({'customProperties': {'table_name': 'test_table', 'comment': None}, "
"'externalUrl': None, 'name': 'test_table
', 'qualifiedName': None, 'description': None, 'uri': None, 'tags': []}), "
"SchemaMetadataClass({'schemaName': 'catalog
.schema.test_table', 'platform': 'urn:li:dataPlatform:trino', 'version': 0, "
"'created': AuditStampClass({'time': 0, 'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'message': None}), 'lastModified': "
"AuditStampClass({'time': 0, 'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'message': None}), 'deleted': None, 'dataset': "
"None, 'cluster': None, 'hash': '', 'platformSchema': MySqlDDLClass({'tableSchema': ''}), 'fields': [SchemaFieldClass({'fieldPath': "
"'name', 'jsonPath': None, 'nullable': True, 'description': None, 'label': None, 'created': None, 'lastModified': None, 'type': "
"SchemaFieldDataTypeClass({'type': StringTypeClass({})}), 'nativeDataType': 'VARCHAR()', 'recursive': False, 'globalTags': None, "
"'glossaryTerms': None, 'isPartOfKey': False, 'isPartitioningKey': None, 'jsonProps': None}), SchemaFieldClass({'fieldPath': 'lastname', "
"'jsonPath': None, 'nullable': True, 'description': None, 'label': None, 'created': None, 'lastModified': None, 'type': "
…………
' File "/usr/local/lib/python3.10/logging/__init__.py", line 368, in getMessage\n'
' msg = msg % self.args\n'
'TypeError: not all arguments converted during string formatting\n'
'Call stack:\n'
Seems like the data cannot be read correctly…
Thanks in advance for answers and tips.alert-fall-82501
11/22/2022, 12:12 PMcolossal-smartphone-90274
11/22/2022, 3:42 PMhappy-notebook-43808
11/22/2022, 9:29 PMsource:
type: mssql
config:
...
# Options
use_odbc: "True"
uri_args:
driver: "ODBC Driver 17 for SQL Server"
Encrypt: "yes"
TrustServerCertificate: "Yes"
ssl: "True"
profiling:
enabled: true
limit: 100000
report_dropped_profiles: false
profile_table_level_only: false
include_field_null_count: true
include_field_min_value: true
include_field_max_value: true
include_field_mean_value: true
include_field_median_value: true
include_field_stddev_value: true
include_field_quantiles: true
include_field_distinct_value_frequencies: true
include_field_sample_values: true
turn_off_expensive_profiling_metrics: false
include_field_histogram: true
catch_exceptions: false
max_workers: 4
query_combiner_enabled: true
max_number_of_fields_to_profile: 100
profile_if_updated_since_days: null
partition_profiling_enabled: false
bland-lighter-26751
11/22/2022, 11:53 PM