microscopic-room-90690
11/01/2022, 6:27 AMsource:
type: s3
config:
platform: s3
profiling:
enabled: false
profile_table_level_only: false
path_specs:
- include: "<s3://path/cluster=dev/datatype={table}/year={partition[0]}/month={partition[1]}/day={partition[2]}/*.parquet>"
aws_config:
aws_region: us-east-1
sink:
type: "datahub-rest"
config:
server: "<http://localhost:8080>"hundreds-photographer-13496
11/01/2022, 7:40 AMmicroscopic-room-90690
11/01/2022, 8:39 AM{table} part of url s3://path/cluster=dev/datatype={table}/year={partition[0]}/month={partition[1]}/day={partition[2]}/*.parquet
"fieldPath": "p95",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "p96",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "p97",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "p98",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "p99",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "region",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "rowKey",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "ts",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
},
{
"fieldPath": "year",
"jsonPath": null,
"nullable": false,
"description": null,
"created": null,
"lastModified": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "string",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false,
"isPartitioningKey": null,
"jsonProps": null
}
],
"primaryKeys": null,
"foreignKeysSpecs": null,
"foreignKeys": null
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1667285121057,
"runId": "s3-2022_11_01-06_44_04",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,path/cluster=dev/joinvoip,PROD)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
"value": "{\"container\": \"urn:li:container:55b31a9bf2521237914e6ad52ccb5f4f\"}",
"contentType": "application/json"
},
"systemMetadata": {
"lastObserved": 1667285121089,
"runId": "s3-2022_11_01-06_44_04",
"registryName": null,
"registryVersion": null,
"properties": null
}
}microscopic-room-90690
11/01/2022, 9:26 AMhundreds-photographer-13496
11/01/2022, 10:23 AMhundreds-photographer-13496
11/01/2022, 10:29 AM"<s3://path/cluster=dev/datatype={table}/year={partition[0]}/month={partition[1]}/day={partition[2]}/*.parquet>"
versus
"<s3://path/cluster=dev/{table}/year={partition[0]}/month={partition[1]}/day={partition[2]}/*.parquet>"
Can you provide the absolute s3 path for which this dataset is created - urn:li:dataset:(urn:li:dataPlatform:s3,path/cluster=dev/joinvoip ?microscopic-room-90690
11/02/2022, 6:01 AMhundreds-photographer-13496
11/02/2022, 9:52 AMgray-shoe-75895
11/02/2022, 9:55 PMdatahub delete without --hard, the “soft-deleted” status might be sticking even though the ingestion was successful. A workaround for now would be to run use the --hard delete flag so that everything is re-ingested completely freshmicroscopic-room-90690
11/03/2022, 1:58 AMdatahub delete --env PROD --entity_type container --platform s3 . And @gray-shoe-75895 it works with --hard !
I have another question. How to recover metadata after soft-delete?microscopic-room-90690
11/03/2022, 2:55 AMgray-shoe-75895
11/03/2022, 8:02 AMgray-shoe-75895
11/03/2022, 8:03 AMprofile_table_level_only to true or disable it altogethermicroscopic-room-90690
11/03/2022, 8:24 AMhundreds-photographer-13496
11/03/2022, 8:45 AMremoved=True. So updating status aspect for all s3 entities to set removed=False should re-show metadata.
Ideally, that should happen automatically if you run s3 ingestion again. However the s3 source currently does not emit status aspect. It should be easy to fix the source, let me come back on this.