Slackbot
06/27/2023, 8:03 AMSergio Ferragut
06/27/2023, 5:52 PMAnant Sharma
06/28/2023, 7:25 AM{
"id": "index_kafka_eber_gateways_sensors_data_5a0ebe22f44a3f5_cepalohc",
"groupId": "index_kafka_eber_gateways_sensors_data",
"type": "index_kafka",
"createdTime": "2023-06-28T06:53:01.032Z",
"queueInsertionTime": "1970-01-01T00:00:00.000Z",
"statusCode": "FAILED",
"status": "FAILED",
"runnerStatusCode": "WAITING",
"duration": -1,
"location": {
"host": "10.101.60.160",
"port": 8100,
"tlsPort": -1
},
"dataSource": "eber_gateways_sensors_data",
"errorMsg": "The worker that this task was assigned disappeared and did not report cleanup within timeout[PT15M]...."
}
and i can see error on my coordinator node as we
2023-06-28T04:05:45,454 ERROR [qtp1286172885-118] org.apache.druid.indexing.common.actions.SegmentAllocateAction - Could not allocate pending segment for rowInterval[2023-06-27T11:15:20.599Z/2023-06-27T11:15:20.600Z], segmentInterval[2023-06-26T00:00:00.000Z/2023-07-03T00:00:00.000Z].
2023-06-28T04:48:58,796 ERROR [Coordinator-Exec--0] org.apache.druid.server.coordinator.rules.LoadRule - Tier[_default_tier] has no servers! Check your cluster configuration!: {class=org.apache.druid.server.coordinator.rules.LoadRule}Amatya Avadhanula
06/28/2023, 7:32 AMAnant Sharma
06/28/2023, 7:44 AMAnant Sharma
06/28/2023, 7:45 AMAnant Sharma
06/28/2023, 7:46 AMAnant Sharma
06/28/2023, 8:17 AMAmatya Avadhanula
06/28/2023, 9:25 AM2023-06-28T044858,796 ERROR [Coordinator-Exec--0] org.apache.druid.server.coordinator.rules.LoadRule - Tier[_default_tier] has no servers! Check your cluster configuration!: {class=org.apache.druid.server.coordinator.rules.LoadRule}There is a historical now so I don't think this should present anymore. Could you confirm if the historical recently restarted?
Amatya Avadhanula
06/28/2023, 9:26 AM"skipOffsetFromLatest": "PT1H",This is not the segment granularity
Amatya Avadhanula
06/28/2023, 9:29 AMAnant Sharma
06/28/2023, 11:28 AMso i recently did the compaction where i put the above mention "skipOffsetFromLatest": "PT1H",before it was for a week ive changed it to hour and below is the supervisor conf and there we have defined the granularity as MONTH are you sure this is because of the granularity as exact same thing is running in my different environment and it working fine that what possibly making the issue here ?
{
"type": "kafka",
"spec": {
"dataSchema": {
"dataSource": "eber_vehicle_components_status",
"timestampSpec": {
"column": "timestamp",
"format": "millis",
"missingValue": null
},
"dimensionsSpec": {
"dimensions": [
{
"type": "string",
"name": "gateway_id",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "value",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "float",
"name": "id",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": false
}
],
"dimensionExclusions": [
"__time",
"timestamp"
],
"includeAllDimensions": false
},
"metricsSpec": [],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "MONTH",
"queryGranularity": {
"type": "none"
},
"rollup": true,
"intervals": []
},
"transformSpec": {
"filter": null,
"transforms": []
}
},
"ioConfig": {
"topic": "eber.vehicle.components.status.qc",
"inputFormat": {
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": []
},
"keepNullColumns": true,
"assumeNewlineDelimited": false,
"useJsonNodeReader": false
},
"replicas": 1,
"taskCount": 1,
"taskDuration": "PT86400S",
"consumerProperties": {
"bootstrap.servers": "qc-kafka-kafka-bootstrap.kafka.svc.cluster.local:9092,",
"security.protocol": "SASL_PLAINTEXT",
"sasl.mechanism": "SCRAM-SHA-512",
"sasl.jaas.config": "org.apache.kafka.common.security.scram.ScramLoginModule required username='admin-etm-qc' password='xxx';",
"auto.offset.reset": "earliest"
},
"autoScalerConfig": null,
"pollTimeout": 100,
"startDelay": "PT5S",
"period": "PT30S",
"useEarliestOffset": true,
"completionTimeout": "PT1800S",
"lateMessageRejectionPeriod": null,
"earlyMessageRejectionPeriod": null,
"lateMessageRejectionStartDateTime": null,
"configOverrides": null,
"idleConfig": null,
"stream": "eber.vehicle.components.status.qc",
"useEarliestSequenceNumber": true,
"type": "kafka"
},
"tuningConfig": {
"type": "kafka",
"appendableIndexSpec": {
"type": "onheap",
"preserveExistingMetrics": false
},
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"skipBytesInMemoryOverheadCheck": false,
"maxRowsPerSegment": 5000000,
"maxTotalRows": null,
"intermediatePersistPeriod": "PT10M",
"maxPendingPersists": 0,
"indexSpec": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"reportParseExceptions": false,
"handoffConditionTimeout": 0,
"resetOffsetAutomatically": false,
"segmentWriteOutMediumFactory": null,
"workerThreads": null,
"chatThreads": null,
"chatRetries": 8,
"httpTimeout": "PT10S",
"shutdownTimeout": "PT80S",
"offsetFetchPeriod": "PT30S",
"intermediateHandoffPeriod": "P2147483647D",
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"skipSequenceNumberAvailabilityCheck": false,
"repartitionTransitionDuration": "PT120S"
}
},
"context": null
}Amatya Avadhanula
06/28/2023, 11:33 AMAnant Sharma
06/28/2023, 11:35 AMAnant Sharma
06/28/2023, 11:35 AM{
"dataSource": "eber_vehicle_components_status",
"taskPriority": 80,
"inputSegmentSizeBytes": 100000000000000,
"maxRowsPerSegment": null,
"skipOffsetFromLatest": "PT1H",
"tuningConfig": {
"maxRowsInMemory": null,
"appendableIndexSpec": null,
"maxBytesInMemory": null,
"maxTotalRows": null,
"splitHintSpec": null,
"partitionsSpec": {
"type": "dynamic",
"maxRowsPerSegment": 5000000,
"maxTotalRows": null
},
"indexSpec": null,
"indexSpecForIntermediatePersists": null,
"maxPendingPersists": null,
"pushTimeout": null,
"segmentWriteOutMediumFactory": null,
"maxNumConcurrentSubTasks": null,
"maxRetry": null,
"taskStatusCheckPeriodMs": null,
"chatHandlerTimeout": null,
"chatHandlerNumRetries": null,
"maxNumSegmentsToMerge": null,
"totalNumMergeTasks": null,
"maxColumnsToMerge": null,
"type": "index_parallel",
"forceGuaranteedRollup": false
},
"granularitySpec": null,
"dimensionsSpec": null,
"metricsSpec": null,
"transformSpec": null,
"ioConfig": null,
"taskContext": null
}Amatya Avadhanula
06/28/2023, 11:41 AMAnant Sharma
06/28/2023, 2:31 PMAmatya Avadhanula
06/28/2023, 5:12 PMAmatya Avadhanula
06/28/2023, 5:13 PMAnant Sharma
06/28/2023, 5:19 PMAnant Sharma
06/28/2023, 5:21 PMAmatya Avadhanula
06/28/2023, 5:23 PMand do think this is the only issue or it could be something else to ?Do you observe any other issues with ingestion for datasources having similar volume + task count? If not, I think this could be the only issue
Anant Sharma
06/28/2023, 5:31 PMAmatya Avadhanula
06/28/2023, 5:32 PMAnant Sharma
06/28/2023, 5:33 PMAnant Sharma
06/28/2023, 5:39 PMAnant Sharma
06/28/2023, 5:40 PMAmatya Avadhanula
06/28/2023, 5:41 PMAmatya Avadhanula
06/28/2023, 5:42 PMAnant Sharma
06/28/2023, 5:47 PMAnant Sharma
06/28/2023, 5:48 PMAmatya Avadhanula
06/28/2023, 5:48 PMAnant Sharma
06/28/2023, 5:50 PMAmatya Avadhanula
06/28/2023, 5:50 PMAnant Sharma
06/28/2023, 5:51 PMAnant Sharma
06/28/2023, 5:51 PMAmatya Avadhanula
06/28/2023, 5:51 PMAnant Sharma
06/28/2023, 5:54 PMAmatya Avadhanula
06/28/2023, 5:54 PMAnant Sharma
06/28/2023, 5:54 PMAnant Sharma
06/28/2023, 5:57 PMAmatya Avadhanula
06/28/2023, 5:57 PMAnant Sharma
06/28/2023, 5:59 PMAmatya Avadhanula
06/28/2023, 5:59 PMAnant Sharma
06/28/2023, 6:03 PMAmatya Avadhanula
06/28/2023, 6:04 PMAmatya Avadhanula
06/28/2023, 6:05 PMAnant Sharma
06/28/2023, 6:06 PMAmatya Avadhanula
06/28/2023, 6:07 PMAnant Sharma
06/28/2023, 6:09 PM2023-06-28T18:01:36,648 ERROR [forking-task-runner-13] org.apache.druid.indexing.overlord.ForkingTaskRunner - Process exited with code[137] for task: index_parallel_eber_vehicle_components_status_beemdiaf_2023-06-28T18:01:17.205Z
but there are only 3 pods running and ive given maximumreplicas to 5 it doest seems to be an issue with resourcesAnant Sharma
06/28/2023, 6:10 PM{
"id": "index_parallel_eber_vehicle_components_status_beemdiaf_2023-06-28T18:01:17.205Z",
"groupId": "index_parallel_eber_vehicle_components_status_beemdiaf_2023-06-28T18:01:17.205Z",
"type": "index_parallel",
"createdTime": "2023-06-28T18:01:17.206Z",
"queueInsertionTime": "1970-01-01T00:00:00.000Z",
"statusCode": "FAILED",
"status": "FAILED",
"runnerStatusCode": "WAITING",
"duration": 19295,
"location": {
"host": "10.101.42.175",
"port": 8101,
"tlsPort": -1
},
"dataSource": "eber_vehicle_components_status",
"errorMsg": "Task execution process exited unsuccessfully with code[137]. See middleManager logs for more details..."
}Amatya Avadhanula
06/28/2023, 6:10 PMAnant Sharma
06/28/2023, 6:16 PMAnant Sharma
06/28/2023, 6:19 PMAnant Sharma
06/28/2023, 6:26 PMAnant Sharma
06/28/2023, 6:27 PMAnant Sharma
06/28/2023, 6:38 PM{
"id": "index_kafka_eber_vehicle_components_status_7825e7f874a89fb_eddnapjj",
"groupId": "index_kafka_eber_vehicle_components_status",
"type": "index_kafka",
"createdTime": "2023-06-28T18:35:37.814Z",
"queueInsertionTime": "1970-01-01T00:00:00.000Z",
"statusCode": "FAILED",
"status": "FAILED",
"runnerStatusCode": "WAITING",
"duration": 12809,
"location": {
"host": "10.101.34.114",
"port": 8100,
"tlsPort": -1
},
"dataSource": "eber_vehicle_components_status",
"errorMsg": "org.apache.druid.java.util.common.ISE: Could not allocate segment for row with timestamp[2023-06-28T..."