Slackbot
06/25/2023, 3:23 PMAbhishek Balaji Radhakrishnan
06/25/2023, 9:34 PMUNHEALTHY_TASK
or UNHEALTHY_SUPERVISOR
could mean a number of different things. Do you see any errors in your supervisor - can you share the overlord logs or relevant stacktraces?
can it be because of resource i provideYes, it can be. Have you checked that the supervisor’s tuning config is good? Specifically, does
maxRowsInMemory
and maxBytesInMemory
look sane wrt the jvm settings? If the supervisor can’t accommodate these thresholds, it’ll OOM and go into an unhealthy state - config and logs /stacktraces would be helpful here.
increased the number of retention period for kafkaWhat is the configured
taskCount
and taskDuration
? If the supervisor can’t keep up with the offered load on the input topic that it falls outside the retention period, you might want to tweak the former setting. Understanding your lag patterns can helpAnant Sharma
06/26/2023, 4:17 AM{
"type": "kafka",
"spec": {
"dataSchema": {
"dataSource": "eberspacher_gateway_sensor",
"timestampSpec": {
"column": "timestamp",
"format": "auto",
"missingValue": null
},
"dimensionsSpec": {
"dimensions": [
{
"type": "string",
"name": "gw_id",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "string",
"name": "type",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": true
},
{
"type": "double",
"name": "value",
"multiValueHandling": "SORTED_ARRAY",
"createBitmapIndex": false
}
],
"dimensionExclusions": [
"__time",
"count",
"timestamp"
],
"includeAllDimensions": false
},
"metricsSpec": [
{
"type": "count",
"name": "count"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "MONTH",
"queryGranularity": {
"type": "none"
},
"rollup": true,
"intervals": []
},
"transformSpec": {
"filter": null,
"transforms": []
}
},
"ioConfig": {
"topic": "eberspacher-gateway-sensor-data-qc",
"inputFormat": {
"type": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": []
},
"keepNullColumns": true,
"assumeNewlineDelimited": false,
"useJsonNodeReader": false
},
"replicas": 1,
"taskCount": 1,
"taskDuration": "PT604800S",
"consumerProperties": {
"bootstrap.servers": "kafka-kafka-bootstrap.kafka.svc.cluster.local:9092,",
"security.protocol": "SASL_PLAINTEXT",
"sasl.mechanism": "SCRAM-SHA-512",
"sasl.jaas.config": "org.apache.kafka.common.security.scram.ScramLoginModule required username='admin-etm-qc' password='xxxxxx';",
"auto.offset.reset": "earliest"
},
"autoScalerConfig": null,
"pollTimeout": 100,
"startDelay": "PT5S",
"period": "PT30S",
"useEarliestOffset": true,
"completionTimeout": "PT1800S",
"lateMessageRejectionPeriod": null,
"earlyMessageRejectionPeriod": null,
"lateMessageRejectionStartDateTime": null,
"configOverrides": null,
"idleConfig": null,
"stream": "eberspacher-gateway-sensor-data-qc",
"useEarliestSequenceNumber": true,
"type": "kafka"
},
"tuningConfig": {
"type": "kafka",
"appendableIndexSpec": {
"type": "onheap",
"preserveExistingMetrics": false
},
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"skipBytesInMemoryOverheadCheck": false,
"maxRowsPerSegment": 5000000,
"maxTotalRows": null,
"intermediatePersistPeriod": "PT10M",
"maxPendingPersists": 0,
"indexSpec": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "roaring",
"compressRunOnSerialization": true
},
"dimensionCompression": "lz4",
"stringDictionaryEncoding": {
"type": "utf8"
},
"metricCompression": "lz4",
"longEncoding": "longs"
},
"reportParseExceptions": false,
"handoffConditionTimeout": 0,
"resetOffsetAutomatically": true,
"segmentWriteOutMediumFactory": null,
"workerThreads": null,
"chatThreads": null,
"chatRetries": 8,
"httpTimeout": "PT10S",
"shutdownTimeout": "PT80S",
"offsetFetchPeriod": "PT30S",
"intermediateHandoffPeriod": "P2147483647D",
"logParseExceptions": false,
"maxParseExceptions": 2147483647,
"maxSavedParseExceptions": 0,
"skipSequenceNumberAvailabilityCheck": false,
"repartitionTransitionDuration": "PT120S"
}
},
"context": null
}
Anant Sharma
06/26/2023, 10:07 AM"maxRowsInMemory": 5000000, -> "maxRowsInMemory": 10000000,
"maxBytesInMemory": 0, -> "maxBytesInMemory": 1073741824,{1GiB}
taskCount": 1 -> taskCount": 3
taskDuration": "PT604800S" -> "taskDuration": "PT3600S", {1H}
Anant Sharma
06/26/2023, 10:21 AMconfig:
DRUID_XMX: 2048m
DRUID_XMS: 2048m
for coordinator and historical node
getting below error
{
"dataSource": "eberspacher_gateway_sensor",
"stream": "eberspacher-gateway-sensor-data-qc",
"partitions": 3,
"replicas": 1,
"durationSeconds": 3600,
"activeTasks": [
{
"id": "index_kafka_eberspacher_gateway_sensor_73f8efb6c3a7c4c_ipefgjof",
"startingOffsets": {
"1": 638876
},
"startTime": "2023-06-26T10:20:43.717Z",
"remainingSeconds": 3550,
"type": "ACTIVE",
"currentOffsets": {},
"lag": {}
},
{
"id": "index_kafka_eberspacher_gateway_sensor_2d49d78205e884a_hpnfclcb",
"startingOffsets": {
"2": 638231
},
"startTime": "2023-06-26T10:20:43.333Z",
"remainingSeconds": 3549,
"type": "ACTIVE",
"currentOffsets": {},
"lag": {}
}
],
"publishingTasks": [],
"latestOffsets": {
"0": 641043,
"1": 638988,
"2": 638364
},
"minimumLag": {
"0": 109,
"1": 112,
"2": 133
},
"aggregateLag": 354,
"offsetsLastUpdated": "2023-06-26T10:21:13.461Z",
"suspended": false,
"healthy": false,
"state": "UNHEALTHY_TASKS",
"detailedState": "UNHEALTHY_TASKS",
"recentErrors": []
}
Abhishek Balaji Radhakrishnan
06/27/2023, 1:41 AMAnant Sharma
06/27/2023, 4:48 AMAbhishek Balaji Radhakrishnan
06/27/2023, 5:09 AMAnant Sharma
06/27/2023, 7:00 AM{
"id": "index_kafka_eber_vehicles_gps_b4db3094603d03e_kfpeaenj",
"groupId": "index_kafka_eber_vehicles_gps",
"type": "index_kafka",
"createdTime": "2023-06-27T06:56:51.968Z",
"queueInsertionTime": "1970-01-01T00:00:00.000Z",
"statusCode": "FAILED",
"status": "FAILED",
"runnerStatusCode": "WAITING",
"duration": 73097,
"location": {
"host": "10.101.34.67",
"port": 8101,
"tlsPort": -1
},
"dataSource": "eber_vehicles_gps",
"errorMsg": "org.apache.druid.java.util.common.ISE: Could not allocate segment for row with timestamp[2023-06-26T..."
Anant Sharma
06/27/2023, 7:00 AM