Vatsal Agrawal
08/29/2025, 5:28 AMDeepak Padhi
08/29/2025, 10:04 AMDeepak Padhi
08/29/2025, 10:04 AMRajkumar
08/30/2025, 6:47 PMRajkumar
08/30/2025, 6:47 PM{
"tableName": "kafka_test_1",
"tableType": "REALTIME",
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant",
"tagOverrideConfig": {}
},
"segmentsConfig": {
"timeColumnName": "time",
"replication": "1",
"replicasPerPartition": "1",
"retentionTimeUnit": null,
"retentionTimeValue": null,
"completionConfig": null,
"crypterClassName": null,
"peerSegmentDownloadScheme": null,
"schemaName": "kafka_test"
},
"tableIndexConfig": {
"loadMode": "MMAP",
"invertedIndexColumns": [],
"createInvertedIndexDuringSegmentGeneration": false,
"rangeIndexColumns": [],
"sortedColumn": [],
"bloomFilterColumns": [],
"bloomFilterConfigs": null,
"noDictionaryColumns": [],
"onHeapDictionaryColumns": [],
"varLengthDictionaryColumns": [],
"enableDefaultStarTree": false,
"starTreeIndexConfigs": null,
"enableDynamicStarTreeCreation": false,
"segmentPartitionConfig": null,
"columnMinMaxValueGeneratorMode": null,
"aggregateMetrics": false,
"nullHandlingEnabled": false,
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.topic.name": "PINOT.TEST",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.broker.list": "{}",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka30.KafkaConsumerFactory",
"stream.kafka.security.protocol": "SASL_SSL",
"stream.kafka.sasl.mechanism": "OAUTHBEARER",
"stream.kafka.sasl.login.callback.handler.class": "org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler",
"stream.kafka.sasl.oauthbearer.token.endpoint.url": "{url}",
"stream.kafka.sasl.jaas.config": "org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId='{}' clientSecret='{}' scope='' extension_logicalCluster='{}' extension_identityPoolId='{}';",
"stream.kafka.ssl.endpoint.identification.algorithm": "https",
"stream.kafka.consumer.prop.group.id": "{}",
"stream.kafka.consumer.prop.auto.offset.reset": "earliest",
"<http://stream.kafka.consumer.prop.request.timeout.ms|stream.kafka.consumer.prop.request.timeout.ms>": "60000",
"<http://stream.kafka.consumer.prop.metadata.max.age.ms|stream.kafka.consumer.prop.metadata.max.age.ms>": "60000",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaAvroMessageDecoder",
"stream.kafka.decoder.prop.schema.registry.url": "https://{}.westeurope.azure.confluent.cloud",
"stream.kafka.decoder.prop.schema.registry.basic.auth.credentials.source": "USER_INFO",
"<http://stream.kafka.decoder.prop.schema.registry.basic.auth.user.info|stream.kafka.decoder.prop.schema.registry.basic.auth.user.info>": "{key}:{secret}"
}
},
"metadata": {},
"ingestionConfig": {
"filterConfig": null,
"transformConfigs": null
},
"quota": {
"storage": null,
"maxQueriesPerSecond": null
},
"task": null,
"routing": {
"segmentPrunerTypes": null,
"instanceSelectorType": null
},
"query": {
"timeoutMs": null
},
"fieldConfigList": null,
"upsertConfig": null,
"tierConfigs": null
}
Rajkumar
09/01/2025, 10:53 AMRajkumar
09/01/2025, 10:55 AM"streamType": "kafka",
"stream.kafka.topic.name": "asdas",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.broker.list": "asasds.westeurope.azure.confluent.cloud:9092",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"security.protocol": "SASL_SSL",
"sasl.mechanism": "PLAIN",
"sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"\" password=\"\";",
"ssl.endpoint.identification.algorithm": "https",
"auto.offset.reset": "earliest",
"<http://stream.kafka.consumer.prop.request.timeout.ms|stream.kafka.consumer.prop.request.timeout.ms>": "60000",
"<http://stream.kafka.consumer.prop.metadata.max.age.ms|stream.kafka.consumer.prop.metadata.max.age.ms>": "60000",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.avro.confluent.KafkaConfluentSchemaRegistryAvroMessageDecoder",
"stream.kafka.decoder.prop.schema.registry.rest.url": "<https://dasdsa.westeurope.azure.confluent.cloud>",
"stream.kafka.decoder.prop.schema.registry.basic.auth.credentials.source": "USER_INFO",
"<http://stream.kafka.decoder.prop.schema.registry.basic.auth.user.info|stream.kafka.decoder.prop.schema.registry.basic.auth.user.info>": ":",
"stream.kafka.decoder.prop.schema.registry.schema.name": "KsqlDataSourceSchema",
"stream.kafka.decoder.prop.format": "AVRO"
Mayank
Naveen
09/02/2025, 3:37 PMRajkumar
09/02/2025, 4:48 PMRajkumar
09/02/2025, 4:48 PMsplit(PEXP_DEAL_KEY, '|', 1)
madhulika
09/04/2025, 5:44 PMPratheek Shetty
09/06/2025, 6:43 AMEXPLAIN "query"
manually in the UI isn’t feasible since we have hundreds of queries.
• After a restart, the servers function normally until the issue reappears.
• My main doubt is that certain queries are significantly expensive and triggering these outages.
Request:
If anyone has experience or additional context on how to efficiently identify long-running or expensive queries (without manually running EXPLAIN
for each one), please share your insights.Tyler Brockmeyer
09/08/2025, 7:47 PMNaveen
09/08/2025, 8:03 PM(DATE_TRUNC('DAY', "eventsgroups".servertime)) >= CAST('2025-08-25' AS timestamp)
date_trunc functions?Kiril Kalchev
09/10/2025, 4:21 PMUpsertCompactionTask
runs, that column gets truncated to 512 characters, which breaks the application since we’re losing data. Am I missing some configuration here, or is this a bug/intentional behavior?Shubham Kumar
09/11/2025, 11:08 AMShubham Kumar
09/11/2025, 11:08 AMShubham Kumar
09/11/2025, 11:14 AMjava.lang.OutOfMemoryError: Java heap space
issue while running a realtime upsert table in Pinot.
Setup details:
• Table type: REALTIME
(upsert = FULL mode)
• Primary keys: ~2B (hashFunction = MURMUR3)
• Cluster: 4 servers
• Memory: 128 GB per server
Problem:
Whenever I start the servers, memory usage grows continuously until ~100–110 GB, and then the server eventually hits:
attaching heap memory use trend, server config, table config, server logs and gc logsjava.lang.OutOfMemoryError: Java heap space
Tyler Brockmeyer
09/11/2025, 4:25 PMcoco
09/12/2025, 8:42 AMmg
09/13/2025, 10:47 PM2025/09/12 13:10:21.281 ERROR [PinotTableRestletResource] [grizzly-http-server-0] null
java.lang.RuntimeException
at org.apache.pinot.controller.helix.core.PinotTableIdealStateBuilder.getPartitionGroupMetadataList(PinotTableIdealStateBuilder.java:110)
at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.getNewPartitionGroupMetadataList(PinotLLCRealtimeSegmentManager.java:1174)
at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.getNewPartitionGroupMetadataList(PinotLLCRealtimeSegmentManager.java:1160)
at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.setUpNewTable(PinotLLCRealtimeSegmentManager.java:381)
at org.apache.pinot.controller.helix.core.PinotHelixResourceManager.addTable(PinotHelixResourceManager.java:1822)
at org.apache.pinot.controller.api.resources.PinotTableRestletResource.addTable(PinotTableRestletResource.java:255)
[... stack trace continues ...]
2025/09/12 13:10:21.283 INFO [ControllerResponseFilter] [grizzly-http-server-0] Handled request from 127.0.0.1 POST <http://localhost:9000/tables>, content-type application/json status code 500 Internal Server Error
2025/09/12 13:10:21.283 INFO [PinotTaskManager] [ZkClient-EventThread-128-pinot-zookeeper:2181] Cleaning up task in scheduler for table testTable_REALTIME
I tried to misconfigure the SSL configuration, then I got this timeout error:
Caused by: org.apache.pinot.spi.stream.TransientConsumerException: org.apache.kafka.common.errors.TimeoutException: Timeout expired while fetching topic metadata
at org.apache.pinot.plugin.stream.kafka20.KafkaStreamMetadataProvider.fetchPartitionCount(KafkaStreamMetadataProvider.java:76)
Configuration
Certificates are generated by Strimzi KafkaUser and converted to PKCS#12 format:
- user.p12
- Client keystore with user certificate and private key
- truststore.p12
- Truststore with Kafka cluster CA certificate
- Password is read from user.password
file
Pinot Helm Configuration:
controller:
extra:
configs: |-
pinot.set.instance.id.to.hostname=true
controller.task.scheduler.enabled=true
stream.kafka.broker.list=kafka-bootstrap:9093
stream.kafka.consumer.factory.class.name=org.apache.pinot.plugin.stream.kafka.KafkaConsumerFactory
security.protocol=SSL
ssl.truststore.location=/opt/pinot/kafka-cert/truststore.p12
ssl.truststore.password=changeit
ssl.truststore.type=PKCS12
ssl.keystore.location=/opt/pinot/kafka-cert/user.p12
ssl.keystore.password=[read-from-file]
ssl.keystore.type=PKCS12
ssl.key.password=[read-from-file]
ssl.endpoint.identification.algorithm=
...
Table Configuration
{
"tableName": "testTable",
"tableType": "REALTIME",
"segmentsConfig": {
"timeColumnName": "DaysSinceEpoch",
"timeType": "DAYS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "7",
"segmentPushType": "APPEND",
"segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
"replication": "1"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.consumer.type": "LowLevel",
"stream.kafka.topic.name": "test-topic",
"stream.kafka.broker.list": "kafka-bootstrap:9093",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaConsumerFactory",
"security.protocol": "SSL",
"ssl.truststore.location": "/opt/pinot/kafka-cert/truststore.p12",
"ssl.truststore.password": "changeit",
"ssl.truststore.type": "PKCS12",
"ssl.keystore.location": "/opt/pinot/kafka-cert/user.p12",
"ssl.keystore.password": "[read-from-file]",
"ssl.keystore.type": "PKCS12",
"ssl.key.password": "[read-from-file]",
"ssl.endpoint.identification.algorithm": ""
}
}
}
Questions
1. Is Strimzi KafkaUser TLS authentication supported? I tried to follow the documentation examples at (https://docs.pinot.apache.org/manage-data/data-import/pinot-stream-ingestion/import-from-apache-kafka#use-kafka-partition-l[…]evel-consumer-with-ssl)
2. Configuration precedence: Should SSL configuration be in global Pinot config, streamConfigs, or both? I've tried both approaches.
3. Certificate format: Are PKCS#12 keystores the correct format? Strimzi generates PEM certificates that I convert using keytool.
4. Debugging: Are there specific logs or debug flags that could help identify if the SSL handshake is successful but authorization is failing?
What I've Verified:
- Kafka topic and KafkaUser exists and has correct permissions
- Certificates are properly mounted in Pinot pods (/opt/pinot/kafka-cert/
)
- Schema creation works fine (POST /schemas
succeeds)
Any suggestions on proper Strimzi integration or debugging steps would be greatly appreciated!Luis P Fernandes
09/14/2025, 9:06 AMAashiq PS
09/15/2025, 3:45 AMNeeraja Sridharan
09/16/2025, 2:18 AMIngestion-time HLL
for our real-time Pinot table using HLL-sketch, to improve the performance of DISTINCTCOUNTMV
based backend queries on our multi-value column
i.e. ids
.
Schema config:
{
"name": "hll_unique_ids",
"dataType": "BYTES"
},
{
"name": "ids",
"dataType": "STRING",
"singleValueField": false
}
Transform config:
{
"columnName": "hll_unique_ids",
"transformFunction": "HLL(murmurHash64(ids), 12)"
}
We added murmurHash64
because - the HLL() transform config was failing and wanted to use a function more compatible with the multi-value column.
But whether we use HLL(), or HLL() & murmurHash64() chained functions, we get the below error. We're currently on Pinot 1.0.
error: Invalid TableConfigs: real_time_pinot_table. Invalid transform function 'HLL(murmurHash64(ids), 12)' for column 'hll_unique_ids'
Appreciate any help to understand what might be causing the HLL() transform function to fail.
cc: @Sai Tarun Tadakamalla @ZEBIN KANG @Jessica StewartRishika
09/16/2025, 6:52 AMZEBIN KANG
09/16/2025, 9:16 PM⚠️ Enabling Groovy
Allowing execuatable Groovy in ingestion transformation can be a security vulnerability. If you would like to enable Groovy for ingestion, you can set the following controller config.
controller.disable.ingestion.groovy=false
If not set, Groovy for ingestion transformation is disabled by default.
Do you know if there is a best practice to use such feature 🙇
cc: @Neeraja Sridharan @Sai Tarun TadakamallaPriyank Bagrecha
09/16/2025, 9:50 PMTrust Okoroego
09/17/2025, 4:52 PMselect
ORDER_ID,
ORDER_NUMBER,
CUSTORDER_ID,
ORDER_VALIDATION_CODE,
POD_CODE,
DELIVERY_FROM_DAT,
DELIVERY_TO_DAT,
CTL_CRE_TS,
CTL_MOD_TS,
ORDER_STATUS_CD,
SAREA_ID,
LAG(ON_HOLD_ORDER_AND_LOCKED_FLAG, 1, 0) OVER (PARTITION BY ORDER_ID ORDER BY CTL_MOD_TS) AS prev_is_active
from
Orders
)
If default is not set, the the correct result is returned, the last row returning a NULL for prev_is_active since no row before it. However setting the default of 0 throws an unrelated timestamp error. Could this be related to NULL handling?
at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2096)
at org.apache.pinot.query.service.server.QueryServer.submit(QueryServer.java:156)
at org.apache.pinot.common.proto.PinotQueryWorkerGrpc$MethodHandlers.invoke(PinotQueryWorkerGrpc.java:284)
...
Caused by: java.lang.RuntimeException: Caught exception while submitting request: 1473823763000000159, stage: 2
at org.apache.pinot.query.service.server.QueryServer.lambda$submit$1(QueryServer.java:144)
at java.base/java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1804)
... 3 more
Caused by: java.util.concurrent.ExecutionException: java.lang.RuntimeException: Failed to instantiate WindowFunction for function: LAG
at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2096)
at org.apache.pinot.query.service.server.QueryServer.lambda$submit$1(QueryServer.java:141)
... 4 more
...
Caused by: java.lang.RuntimeException: Failed to instantiate WindowFunction for function: LAG
at org.apache.pinot.query.runtime.operator.window.WindowFunctionFactory.construnctWindowFunction(WindowFunctionFactory.java:56)
at org.apache.pinot.query.runtime.operator.WindowAggregateOperator.<init>(WindowAggregateOperator.java:145)
at org.apache.pinot.query.runtime.plan.PhysicalPlanVisitor.visitWindow(PhysicalPlanVisitor.java:107)
at org.apache.pinot.query.runtime.plan.PhysicalPlanVisitor.visitWindow(PhysicalPlanVisitor.java:65)
...
Caused by: java.lang.reflect.InvocationTargetException
at jdk.internal.reflect.GeneratedConstructorAccessor151.newInstance(Unknown Source)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
...
Caused by: java.lang.UnsupportedOperationException: Cannot convert value from INTEGER to TIMESTAMP
at org.apache.pinot.common.utils.PinotDataType$5.toTimestamp(PinotDataType.java:300)
at org.apache.pinot.common.utils.PinotDataType$10.convert(PinotDataType.java:593)
at org.apache.pinot.common.utils.PinotDataType$10.convert(PinotDataType.java:545)
at org.apache.pinot.query.runtime.operator.window.value.LagValueWindowFunction.<init>(LagValueWindowFunction.java:63)
org.apache.pinot.query.service.dispatch.QueryDispatcher.submit(QueryDispatcher.java:198)
org.apache.pinot.query.service.dispatch.QueryDispatcher.submitAndReduce(QueryDispatcher.java:95)
org.apache.pinot.broker.requesthandler.MultiStageBrokerRequestHandler.handleRequest(MultiStageBrokerRequestHandler.java:219)
org.apache.pinot.broker.requesthandler.BaseBrokerRequestHandler.handleRequest(BaseBrokerRequestHandler.java:133)
Priyank Bagrecha
09/17/2025, 10:03 PM