https://pinot.apache.org/ logo
Join Slack
Powered by
# troubleshooting
  • v

    Vatsal Agrawal

    08/29/2025, 5:28 AM
    Hi Team, We are facing an issue with MergeRollupTask in our Pinot cluster. After the task runs, the original segments are not getting deleted, and we end up with both the original and the merged segments in the table. Retention properties: left as default. Any guidance on what we might be missing would be super helpful. Adding task, table and segments related details in the thread.
    • 1
    • 2
  • d

    Deepak Padhi

    08/29/2025, 10:04 AM
    #C011C9JHN7R I am having issues with star-tree tableau connector where the additional properties does not work consistently with respect to query options when trying to set multistage option to true
  • d

    Deepak Padhi

    08/29/2025, 10:04 AM
    In tableau server it is failing to sign in where it at least connects in desktop
  • r

    Rajkumar

    08/30/2025, 6:47 PM
    Hey all, Very new to Pinot, and been trying to get a realtime table working from Confluent Kafka, Pinot doesnt like something in my config, and it times out before the table is created, the id/api keys do have access to kafka, so I am expecting something wrong with my config below, any reference/pointers is much appreciated.
    m
    • 2
    • 1
  • r

    Rajkumar

    08/30/2025, 6:47 PM
    {
    "tableName": "kafka_test_1",
    "tableType": "REALTIME",
    "tenants": {
    "broker": "DefaultTenant",
    "server": "DefaultTenant",
    "tagOverrideConfig": {}
    },
    "segmentsConfig": {
    "timeColumnName": "time",
    "replication": "1",
    "replicasPerPartition": "1",
    "retentionTimeUnit": null,
    "retentionTimeValue": null,
    "completionConfig": null,
    "crypterClassName": null,
    "peerSegmentDownloadScheme": null,
    "schemaName": "kafka_test"
    },
    "tableIndexConfig": {
    "loadMode": "MMAP",
    "invertedIndexColumns": [],
    "createInvertedIndexDuringSegmentGeneration": false,
    "rangeIndexColumns": [],
    "sortedColumn": [],
    "bloomFilterColumns": [],
    "bloomFilterConfigs": null,
    "noDictionaryColumns": [],
    "onHeapDictionaryColumns": [],
    "varLengthDictionaryColumns": [],
    "enableDefaultStarTree": false,
    "starTreeIndexConfigs": null,
    "enableDynamicStarTreeCreation": false,
    "segmentPartitionConfig": null,
    "columnMinMaxValueGeneratorMode": null,
    "aggregateMetrics": false,
    "nullHandlingEnabled": false,
    "streamConfigs": {
    "streamType": "kafka",
    "stream.kafka.topic.name": "PINOT.TEST",
    "stream.kafka.consumer.type": "lowlevel",
    "stream.kafka.broker.list": "{}",
    "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka30.KafkaConsumerFactory",
    "stream.kafka.security.protocol": "SASL_SSL",
    "stream.kafka.sasl.mechanism": "OAUTHBEARER",
    "stream.kafka.sasl.login.callback.handler.class": "org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginCallbackHandler",
    "stream.kafka.sasl.oauthbearer.token.endpoint.url": "{url}",
    "stream.kafka.sasl.jaas.config": "org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required clientId='{}' clientSecret='{}' scope='' extension_logicalCluster='{}' extension_identityPoolId='{}';",
    "stream.kafka.ssl.endpoint.identification.algorithm": "https",
    "stream.kafka.consumer.prop.group.id": "{}",
    "stream.kafka.consumer.prop.auto.offset.reset": "earliest",
    "<http://stream.kafka.consumer.prop.request.timeout.ms|stream.kafka.consumer.prop.request.timeout.ms>": "60000",
    "<http://stream.kafka.consumer.prop.metadata.max.age.ms|stream.kafka.consumer.prop.metadata.max.age.ms>": "60000",
    "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaAvroMessageDecoder",
    "stream.kafka.decoder.prop.schema.registry.url": "https://{}.westeurope.azure.confluent.cloud",
    "stream.kafka.decoder.prop.schema.registry.basic.auth.credentials.source": "USER_INFO",
    "<http://stream.kafka.decoder.prop.schema.registry.basic.auth.user.info|stream.kafka.decoder.prop.schema.registry.basic.auth.user.info>": "{key}:{secret}"
    }
    },
    "metadata": {},
    "ingestionConfig": {
    "filterConfig": null,
    "transformConfigs": null
    },
    "quota": {
    "storage": null,
    "maxQueriesPerSecond": null
    },
    "task": null,
    "routing": {
    "segmentPrunerTypes": null,
    "instanceSelectorType": null
    },
    "query": {
    "timeoutMs": null
    },
    "fieldConfigList": null,
    "upsertConfig": null,
    "tierConfigs": null
    }
  • r

    Rajkumar

    09/01/2025, 10:53 AM
    Just to give an update, this was resolved with the below config
  • r

    Rajkumar

    09/01/2025, 10:55 AM
    "streamType": "kafka",
    "stream.kafka.topic.name": "asdas",
    "stream.kafka.consumer.type": "lowlevel",
    "stream.kafka.broker.list": "asasds.westeurope.azure.confluent.cloud:9092",
    "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
    "security.protocol": "SASL_SSL",
    "sasl.mechanism": "PLAIN",
    "sasl.jaas.config": "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"\" password=\"\";",
    "ssl.endpoint.identification.algorithm": "https",
    "auto.offset.reset": "earliest",
    "<http://stream.kafka.consumer.prop.request.timeout.ms|stream.kafka.consumer.prop.request.timeout.ms>": "60000",
    "<http://stream.kafka.consumer.prop.metadata.max.age.ms|stream.kafka.consumer.prop.metadata.max.age.ms>": "60000",
    "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.avro.confluent.KafkaConfluentSchemaRegistryAvroMessageDecoder",
    "stream.kafka.decoder.prop.schema.registry.rest.url": "<https://dasdsa.westeurope.azure.confluent.cloud>",
    "stream.kafka.decoder.prop.schema.registry.basic.auth.credentials.source": "USER_INFO",
    "<http://stream.kafka.decoder.prop.schema.registry.basic.auth.user.info|stream.kafka.decoder.prop.schema.registry.basic.auth.user.info>": ":",
    "stream.kafka.decoder.prop.schema.registry.schema.name": "KsqlDataSourceSchema",
    "stream.kafka.decoder.prop.format": "AVRO"
  • m

    Mayank

    09/01/2025, 12:03 PM
    Thanks @Rajkumar for confirming
    👍 1
  • n

    Naveen

    09/02/2025, 3:37 PM
    "task": { "taskTypeConfigsMap": { "MergeRollupTask": { "1hour.mergeType": "rollup", "1hour.bucketTimePeriod": "1h", "1hour.bufferTimePeriod": "3h", "1hour.maxNumRecordsPerSegment": "1000000", "1hour.maxNumRecordsPerTask": "5000000", "1hour.maxNumParallelBuckets": "5", "1day.mergeType": "rollup", "1day.bucketTimePeriod": "1d", "1day.bufferTimePeriod": "1d", "1day.roundBucketTimePeriod": "1d", "1day.maxNumRecordsPerSegment": "1000000", "1day.maxNumRecordsPerTask": "5000000", "metric2.aggregationType": "avg", "metric.aggregationType": "avg", "metric3.aggregationType": "avg", "metric4.aggregationType": "avg", "scores_sc.aggregationType": "avg" } } } based on the docs, I understood avg is not supported during aggregation is my understand is correct or we can do average as well. is my above job is correct or not.
  • r

    Rajkumar

    09/02/2025, 4:48 PM
    Hi All, what's a neat way of extracting strings from a concatenated sting. Example 'Raj|25|Male' should go to three fields Name, Age, Gender.
    m
    • 2
    • 4
  • r

    Rajkumar

    09/02/2025, 4:48 PM
    I tried to do the below, but doesnt work
    split(PEXP_DEAL_KEY, '|', 1)
  • m

    madhulika

    09/04/2025, 5:44 PM
    Hi @Mayank Can I used balanced segment assignment strategy with strict replica group for routing?
    m
    • 2
    • 12
  • p

    Pratheek Shetty

    09/06/2025, 6:43 AM
    Hello Good morning. Our servers intermittently stop responding and return 502 Gateway Timeout errors. The issue resolves temporarily after a restart, but it recurs once the system is under load. Given that our dataset is extremely large (around 300 billion records across all tables), my suspicion is that one or more expensive queries are causing the system to hang. Observations & Context: • I want to identify which queries are running for a long time or consuming excessive resources. • Running
    EXPLAIN "query"
    manually in the UI isn’t feasible since we have hundreds of queries. • After a restart, the servers function normally until the issue reappears. • My main doubt is that certain queries are significantly expensive and triggering these outages. Request: If anyone has experience or additional context on how to efficiently identify long-running or expensive queries (without manually running
    EXPLAIN
    for each one), please share your insights.
    m
    m
    • 3
    • 9
  • t

    Tyler Brockmeyer

    09/08/2025, 7:47 PM
    Hi, I've got a question regarding multiple tenants on Kubernetes deployments of Pinot. Given, • the clients of our pinot cluster need to be able to reach the appropriate broker for the table that they are querying from outside the kubernetes cluster • brokers can be rebalanced, and we might have multiple brokers for any one tenant • as far as I can tell, the controller will only give you the cluster-local URL of the appropriate broker when asked about where to find a table How can I set up a single URL for the whole cluster, or a single URL for each tenant (either case is fine) for clients outside our kubernetes cluster to access the correct broker? I do not see anything detailing a way to allow the controller to advertise the cluster-external broker URL instead of the cluster-local one. If it were able to do this, it would likely be sufficient for our use case.
    m
    • 2
    • 3
  • n

    Naveen

    09/08/2025, 8:03 PM
    Hi, To use rollup segments effectively is there any condition we need to follow like filtering on raw timestamp column without using
    Copy code
    (DATE_TRUNC('DAY', "eventsgroups".servertime)) >= CAST('2025-08-25' AS timestamp)
    date_trunc functions?
    m
    m
    • 3
    • 9
  • k

    Kiril Kalchev

    09/10/2025, 4:21 PM
    Hi everyone, We’ve hit a strange bug. We have a Realtime table with upserts and a relatively large primary key, where one of the columns is a string of up to 2048 characters. When an
    UpsertCompactionTask
    runs, that column gets truncated to 512 characters, which breaks the application since we’re losing data. Am I missing some configuration here, or is this a bug/intentional behavior?
    m
    r
    c
    • 4
    • 66
  • s

    Shubham Kumar

    09/11/2025, 11:08 AM
    table config
    table config
  • s

    Shubham Kumar

    09/11/2025, 11:08 AM
    table config
    Untitled
  • s

    Shubham Kumar

    09/11/2025, 11:14 AM
    Hi Team, I am facing a
    java.lang.OutOfMemoryError: Java heap space
    issue while running a realtime upsert table in Pinot. Setup details: • Table type:
    REALTIME
    (upsert = FULL mode) • Primary keys: ~2B (hashFunction = MURMUR3) • Cluster: 4 servers • Memory: 128 GB per server Problem: Whenever I start the servers, memory usage grows continuously until ~100–110 GB, and then the server eventually hits:
    java.lang.OutOfMemoryError: Java heap space
    attaching heap memory use trend, server config, table config, server logs and gc logs
    gc.logserver logstable config.rtfserver config.rtf
    m
    • 2
    • 1
  • t

    Tyler Brockmeyer

    09/11/2025, 4:25 PM
    We're using Pinot in Kubernetes, and we would like to start using multiple tenants in order to support physical data segregation. We need to not only physically separate the data, but also encrypt each tenant's data at-rest using a different encryption key for their bound volume. The feature that would enable all of this is the ability to set specific servers to be assigned to specific tenants. I have not seen that this is possible in any configuration. Does anyone know of a way to do it?
    x
    • 2
    • 7
  • c

    coco

    09/12/2025, 8:42 AM
    Hi. Pinot team. I'm receiving a "Message loss detected" error on a server in a Pino 1.2 cluster. Does this error mean that the segment failed to start consuming from startOffset and instead started consuming from batchFirstOffset? Does this mean data loss occurred between startOffset: 769410318 and batchFirstOffset: 769525930? However, when I query the data corresponding to the offset in Pino, the data corresponding to the offset was not lost. What is this error log? 2025/09/12 150935.996 ERROR [debug_tiara_core__1__1__20250912T0608Z] org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager_debug_tiara_core__1__1__20250912T0608ZreportDataLoss958 Message loss detected in stream partition: 1 for table: debug_tiara_core_REALTIME startOffset: 769410318 batchFirstOffset: 769525930 select $segmentName, * from debug_tiara_core where __metadata$partition='1' and __metadata$offset >= '769410318' and __metadata$offset <= '769525930 ' -- Data exists, no loss. -- debug_tiara_core__1__1__20250912T0608Z { "segment.realtime.startOffset": "769410318", "segment.realtime.endOffset": "769571100", "segment.creation.time": "1757657338493", "segment.start.time": "1757635200000", "segment.end.time": "1757635200000", "segment.total.docs": "150,000", }
    h
    • 2
    • 6
  • m

    mg

    09/13/2025, 10:47 PM
    Pinot REALTIME Table Creation Fails with Strimzi Kafka TLS Authentication env: (Pinot helm chart: 0.3.4, Strimzi Kafka 3.7 both deployed on the same gke cluster) We are trying to create a REALTIME table in Pinot that connects to a Kafka cluster secured with TLS using certificates generated by Strimzi KafkaUser. The KafkaUser has full ACL rights (all operations on all resources). The issue appears to be during the table creation process when Pinot tries to fetch partition metadata from Kafka. I think this suggests that SSL connection is established but something fails during Kafka topic introspection. note: When I run the same configuration but with plain kafka connect (not tls) topic creation works fine. Error Details Controller Logs - Main Exception
    Copy code
    2025/09/12 13:10:21.281 ERROR [PinotTableRestletResource] [grizzly-http-server-0] null
    java.lang.RuntimeException
            at org.apache.pinot.controller.helix.core.PinotTableIdealStateBuilder.getPartitionGroupMetadataList(PinotTableIdealStateBuilder.java:110)
            at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.getNewPartitionGroupMetadataList(PinotLLCRealtimeSegmentManager.java:1174)
            at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.getNewPartitionGroupMetadataList(PinotLLCRealtimeSegmentManager.java:1160)
            at org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager.setUpNewTable(PinotLLCRealtimeSegmentManager.java:381)
            at org.apache.pinot.controller.helix.core.PinotHelixResourceManager.addTable(PinotHelixResourceManager.java:1822)
            at org.apache.pinot.controller.api.resources.PinotTableRestletResource.addTable(PinotTableRestletResource.java:255)
            [... stack trace continues ...]
    
    2025/09/12 13:10:21.283 INFO [ControllerResponseFilter] [grizzly-http-server-0] Handled request from 127.0.0.1 POST <http://localhost:9000/tables>, content-type application/json status code 500 Internal Server Error
    
    2025/09/12 13:10:21.283 INFO [PinotTaskManager] [ZkClient-EventThread-128-pinot-zookeeper:2181] Cleaning up task in scheduler for table testTable_REALTIME
    I tried to misconfigure the SSL configuration, then I got this timeout error:
    Copy code
    Caused by: org.apache.pinot.spi.stream.TransientConsumerException: org.apache.kafka.common.errors.TimeoutException: Timeout expired while fetching topic metadata
            at org.apache.pinot.plugin.stream.kafka20.KafkaStreamMetadataProvider.fetchPartitionCount(KafkaStreamMetadataProvider.java:76)
    Configuration Certificates are generated by Strimzi KafkaUser and converted to PKCS#12 format: -
    user.p12
    - Client keystore with user certificate and private key -
    truststore.p12
    - Truststore with Kafka cluster CA certificate - Password is read from
    user.password
    file Pinot Helm Configuration:
    Copy code
    controller:
      extra:
        configs: |-
          pinot.set.instance.id.to.hostname=true
          controller.task.scheduler.enabled=true
          stream.kafka.broker.list=kafka-bootstrap:9093
          stream.kafka.consumer.factory.class.name=org.apache.pinot.plugin.stream.kafka.KafkaConsumerFactory
          security.protocol=SSL
          ssl.truststore.location=/opt/pinot/kafka-cert/truststore.p12
          ssl.truststore.password=changeit
          ssl.truststore.type=PKCS12
          ssl.keystore.location=/opt/pinot/kafka-cert/user.p12
          ssl.keystore.password=[read-from-file]
          ssl.keystore.type=PKCS12
          ssl.key.password=[read-from-file]
          ssl.endpoint.identification.algorithm=
    ...
    Table Configuration
    Copy code
    {
      "tableName": "testTable",
      "tableType": "REALTIME",
      "segmentsConfig": {
        "timeColumnName": "DaysSinceEpoch",
        "timeType": "DAYS",
        "retentionTimeUnit": "DAYS",
        "retentionTimeValue": "7",
        "segmentPushType": "APPEND",
        "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
        "replication": "1"
      },
      "tenants": {
        "broker": "DefaultTenant",
        "server": "DefaultTenant"
      },
      "tableIndexConfig": {
        "loadMode": "MMAP",
        "streamConfigs": {
          "streamType": "kafka",
          "stream.kafka.consumer.type": "LowLevel",
          "stream.kafka.topic.name": "test-topic",
          "stream.kafka.broker.list": "kafka-bootstrap:9093",
          "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
          "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaConsumerFactory",
          "security.protocol": "SSL",
          "ssl.truststore.location": "/opt/pinot/kafka-cert/truststore.p12",
          "ssl.truststore.password": "changeit",
          "ssl.truststore.type": "PKCS12",
          "ssl.keystore.location": "/opt/pinot/kafka-cert/user.p12",
          "ssl.keystore.password": "[read-from-file]",
          "ssl.keystore.type": "PKCS12",
          "ssl.key.password": "[read-from-file]",
          "ssl.endpoint.identification.algorithm": ""
        }
      }
    }
    Questions 1. Is Strimzi KafkaUser TLS authentication supported? I tried to follow the documentation examples at (https://docs.pinot.apache.org/manage-data/data-import/pinot-stream-ingestion/import-from-apache-kafka#use-kafka-partition-l[…]evel-consumer-with-ssl) 2. Configuration precedence: Should SSL configuration be in global Pinot config, streamConfigs, or both? I've tried both approaches. 3. Certificate format: Are PKCS#12 keystores the correct format? Strimzi generates PEM certificates that I convert using keytool. 4. Debugging: Are there specific logs or debug flags that could help identify if the SSL handshake is successful but authorization is failing? What I've Verified: - Kafka topic and KafkaUser exists and has correct permissions - Certificates are properly mounted in Pinot pods (
    /opt/pinot/kafka-cert/
    ) - Schema creation works fine (
    POST /schemas
    succeeds) Any suggestions on proper Strimzi integration or debugging steps would be greatly appreciated!
    m
    x
    • 3
    • 5
  • l

    Luis P Fernandes

    09/14/2025, 9:06 AM
    We are sufferring from timeout issues with ZK sessions leading to issues constant segments marked as UPDATING. We suffered and IP issue with one of our servers, and once that was resolved it triggered a large number of segments reload, ater that we started to experience timeout issues with ZK sessions: Pinot Server Logs: 2025/09/13 193221.866 INFO [ZkClient] [Start a Pinot [SERVER]-EventThread] zkclient 3, zookeeper state changed ( Expired ) 2025/09/13 193221.866 WARN [ClientCnxn] [Start a Pinot [SERVER]-SendThread(100.100.80.80:2191)] Session 0x10008afb572019b for server aiops-5g-pinot-test-zookeeper1/100.100.80.80:2191, Closing socket connection. Attempting reconnect except it is a SessionExpiredException or SessionTimeoutException. org.apache.zookeeper.ClientCnxn$SessionExpiredException: Unable to reconnect to ZooKeeper service, session 0x10008afb572019b has expired at org.apache.zookeeper.ClientCnxn$SendThread.onConnected(ClientCnxn.java:1439) ~[pinot-all-1.3.0-jar-with-dependencies.jar:1.3.0-c0023da298126af6a01b802a04b66da34ba16134] at org.apache.zookeeper.ClientCnxnSocket.readConnectResult(ClientCnxnSocket.java:150) ~[pinot-all-1.3.0-jar-with-dependencies.jar:1.3.0-c0023da298126af6a01b802a04b66da34ba16134] at org.apache.zookeeper.ClientCnxnSocketNIO.doIO(ClientCnxnSocketNIO.java:86) ~[pinot-all-1.3.0-jar-with-dependencies.jar:1.3.0-c0023da298126af6a01b802a04b66da34ba16134] at org.apache.zookeeper.ClientCnxnSocketNIO.doTransport(ClientCnxnSocketNIO.java:350) ~[pinot-all-1.3.0-jar-with-dependencies.jar:1.3.0-c0023da298126af6a01b802a04b66da34ba16134] at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1291) [pinot-all-1.3.0-jar-with-dependencies.jar:1.3.0-c0023da298126af6a01b802a04b66da34ba16134] 2025/09/13 193221.867 INFO [ZooKeeper] [Start a Pinot [SERVER]-EventThread] Initiating client connection, connectString=100.100.80.80:2191 sessionTimeout=30000 watcher=org.apache.helix.zookeeper.impl.client.ZkClient@529eba37 2025/09/13 193221.867 INFO [ClientCnxnSocket] [Start a Pinot [SERVER]-EventThread] jute.maxbuffer value is 40000000 Bytes ZK Logs: Sep 14 122633 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122633,971 [myid:] - INFO [Snapshot Thread:o.a.z.s.ZooKeeperServer@589] - Snapshot taken in 1874 ms Sep 14 122659 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122659,329 [myid:] - INFO [SessionTracker:o.a.z.s.ZooKeeperServer@730] - Expiring session 0x100002eb7b20024, timeout of 30000ms exceeded Sep 14 122701 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122701,329 [myid:] - INFO [SessionTracker:o.a.z.s.ZooKeeperServer@730] - Expiring session 0x100002eb7b20036, timeout of 30000ms exceeded Sep 14 122704 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122704,553 [myid:] - INFO [NIOWorkerThread-4:o.a.z.s.ZooKeeperServer@1177] - Invalid session 0x100002eb7b20024 for client /100.100.25.106:45014, probably expired Sep 14 122705 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122705,332 [myid:] - INFO [NIOWorkerThread-10:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.100.131:38872, session = 0x100002eb7b20039 Sep 14 122723 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122723,330 [myid:] - INFO [SessionTracker:o.a.z.s.ZooKeeperServer@730] - Expiring session 0x100002eb7b20039, timeout of 30000ms exceeded Sep 14 122728 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122728,859 [myid:] - INFO [NIOWorkerThread-1:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.5.153:39416, session = 0x100002eb7b20031 Sep 14 122731 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122731,516 [myid:] - INFO [NIOWorkerThread-7:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.100.131:41740, session = 0x0 Sep 14 122753 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122753,329 [myid:] - INFO [SessionTracker:o.a.z.s.ZooKeeperServer@730] - Expiring session 0x100002eb7b20031, timeout of 30000ms exceeded Sep 14 122814 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122814,494 [myid:] - INFO [NIOWorkerThread-3:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.105.251:34936, session = 0x0 Sep 14 122820 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122820,024 [myid:] - INFO [SyncThread0o.a.z.s.p.FileTxnLog@291] - Creating new log file: log.c9d9be7 Sep 14 122820 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122820,025 [myid:] - INFO [Snapshot Thread:o.a.z.s.p.FileTxnSnapLog@480] - Snapshotting: 0xc9d9b19 to /srv/zookeeper/data/version-2/snapshot.c9d9b19 Sep 14 122821 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122821,992 [myid:] - INFO [Snapshot Thread:o.a.z.s.ZooKeeperServer@589] - Snapshot taken in 1968 ms Sep 14 122838 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122838,721 [myid:] - INFO [Snapshot Thread:o.a.z.s.p.FileTxnSnapLog@480] - Snapshotting: 0xc9e9a9b to /srv/zookeeper/data/version-2/snapshot.c9e9a9b Sep 14 122838 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122838,722 [myid:] - INFO [SyncThread0o.a.z.s.p.FileTxnLog@291] - Creating new log file: log.c9e9a9d Sep 14 122840 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122840,437 [myid:] - INFO [Snapshot Thread:o.a.z.s.ZooKeeperServer@589] - Snapshot taken in 1716 ms Sep 14 122910 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122910,961 [myid:] - INFO [NIOWorkerThread-5:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.123.106:54930, session = 0x100002eb7b20026 Sep 14 122916 aiops-5g-pinot-test-zookeeper1 zkServer.sh[8056]: 2025-09-14 122916,001 [myid:] - INFO [NIOWorkerThread-6:o.a.z.s.NIOServerCnxn@342] - Unable to read additional data from client, it probably closed the socket: address = /100.100.5.153:37504, session = 0x0 We have increase the Djute.maxbuffer to 4MB but we still cannot bring the cluste back to operation. Is there an option to increase the sessions timeout as all the flags: -zookeeper.connection.timeout, zk.session.timeout.ms, helix.zookeeper.session.timeout dont seem to work. Any suggestions would be welcomed, thanks.
    j
    m
    x
    • 4
    • 29
  • a

    Aashiq PS

    09/15/2025, 3:45 AM
    hi, i have created realtime table, it contains 134 coloumns and initially the kafka partition was 1 and the segment size we varying from 10Mb to 5Gb for document count between 50 to 5000, we are facing issue while quering and it taking huge resources for servers. currently we have increased kafka partition to 3 and segment size is now around 50Mb to 500Mb but still finds the document count is between 50 to 2000, can anybody suggest a solution. Also the segment flush is still based on 1h window and not exactly 200Mb per segments.
    Untitled.json
    m
    s
    • 3
    • 6
  • n

    Neeraja Sridharan

    09/16/2025, 2:18 AM
    Hey đź‘‹ We're trying to set-up
    Ingestion-time HLL
    for our real-time Pinot table using HLL-sketch, to improve the performance of
    DISTINCTCOUNTMV
    based backend queries on our
    multi-value column
    i.e.
    ids
    . Schema config:
    Copy code
    {
    		"name": "hll_unique_ids",
    		"dataType": "BYTES"
     },
     {
    		"name": "ids",
    		"dataType": "STRING",
    		"singleValueField": false
     }
    Transform config:
    Copy code
    {
      "columnName": "hll_unique_ids",
      "transformFunction": "HLL(murmurHash64(ids), 12)"
    }
    We added
    murmurHash64
    because - the HLL() transform config was failing and wanted to use a function more compatible with the multi-value column. But whether we use HLL(), or HLL() & murmurHash64() chained functions, we get the below error. We're currently on Pinot 1.0.
    error: Invalid TableConfigs: real_time_pinot_table. Invalid transform function 'HLL(murmurHash64(ids), 12)' for column 'hll_unique_ids'
    Appreciate any help to understand what might be causing the HLL() transform function to fail. cc: @Sai Tarun Tadakamalla @ZEBIN KANG @Jessica Stewart
    m
    • 2
    • 17
  • r

    Rishika

    09/16/2025, 6:52 AM
    Anyone tried to consume data from Snowflake Tables? Last time I got to know the connector is not available . Any ideas ?
    • 1
    • 1
  • z

    ZEBIN KANG

    09/16/2025, 9:16 PM
    Hey team, we notice that pinot official doc mentioned this
    Copy code
    ⚠️ Enabling Groovy
    
    Allowing execuatable Groovy in ingestion transformation can be a security vulnerability. If you would like to enable Groovy for ingestion, you can set the following controller config.
    
    controller.disable.ingestion.groovy=false
    
    If not set, Groovy for ingestion transformation is disabled by default.
    Do you know if there is a best practice to use such feature 🙇 cc: @Neeraja Sridharan @Sai Tarun Tadakamalla
    🙇‍♀️ 1
    n
    m
    • 3
    • 4
  • p

    Priyank Bagrecha

    09/16/2025, 9:50 PM
    is it possible to check the status of a standalone batch ingestion task? wondering if i should use standalone or spark for batch ingestion. i am planning to have 2 tables with data being loaded to one table once daily, and another table once every hour. query rps is low and is mostly for powering dashboards. i want to track ingestion after triggering it so i don't trigger ingestion for the next hour right away in my poc and overwhelm controller.
    • 1
    • 1
  • t

    Trust Okoroego

    09/17/2025, 4:52 PM
    Possible bug in Pinot LAG window function. (1.2.0)
    Copy code
    select
    	  ORDER_ID,
    	  ORDER_NUMBER,
    	  CUSTORDER_ID,
    	  ORDER_VALIDATION_CODE,
    	  POD_CODE,
    	  DELIVERY_FROM_DAT,
    	  DELIVERY_TO_DAT,
    	  CTL_CRE_TS,
    	  CTL_MOD_TS,
    	  ORDER_STATUS_CD,
    	  SAREA_ID,
    	    LAG(ON_HOLD_ORDER_AND_LOCKED_FLAG, 1, 0) OVER (PARTITION BY ORDER_ID ORDER BY CTL_MOD_TS) AS prev_is_active
    		from
    		Orders
      )
    If default is not set, the the correct result is returned, the last row returning a NULL for prev_is_active since no row before it. However setting the default of 0 throws an unrelated timestamp error. Could this be related to NULL handling?
    Copy code
    at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
    	at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2096)
    	at org.apache.pinot.query.service.server.QueryServer.submit(QueryServer.java:156)
    	at org.apache.pinot.common.proto.PinotQueryWorkerGrpc$MethodHandlers.invoke(PinotQueryWorkerGrpc.java:284)
    ...
    Caused by: java.lang.RuntimeException: Caught exception while submitting request: 1473823763000000159, stage: 2
    	at org.apache.pinot.query.service.server.QueryServer.lambda$submit$1(QueryServer.java:144)
    	at java.base/java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1804)
    	... 3 more
    Caused by: java.util.concurrent.ExecutionException: java.lang.RuntimeException: Failed to instantiate WindowFunction for function: LAG
    	at java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
    	at java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2096)
    	at org.apache.pinot.query.service.server.QueryServer.lambda$submit$1(QueryServer.java:141)
    	... 4 more
    ...
    Caused by: java.lang.RuntimeException: Failed to instantiate WindowFunction for function: LAG
    	at org.apache.pinot.query.runtime.operator.window.WindowFunctionFactory.construnctWindowFunction(WindowFunctionFactory.java:56)
    	at org.apache.pinot.query.runtime.operator.WindowAggregateOperator.<init>(WindowAggregateOperator.java:145)
    	at org.apache.pinot.query.runtime.plan.PhysicalPlanVisitor.visitWindow(PhysicalPlanVisitor.java:107)
    	at org.apache.pinot.query.runtime.plan.PhysicalPlanVisitor.visitWindow(PhysicalPlanVisitor.java:65)
    ...
    Caused by: java.lang.reflect.InvocationTargetException
    	at jdk.internal.reflect.GeneratedConstructorAccessor151.newInstance(Unknown Source)
    	at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    	at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
    	at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
    ...
    Caused by: java.lang.UnsupportedOperationException: Cannot convert value from INTEGER to TIMESTAMP
    	at org.apache.pinot.common.utils.PinotDataType$5.toTimestamp(PinotDataType.java:300)
    	at org.apache.pinot.common.utils.PinotDataType$10.convert(PinotDataType.java:593)
    	at org.apache.pinot.common.utils.PinotDataType$10.convert(PinotDataType.java:545)
    	at org.apache.pinot.query.runtime.operator.window.value.LagValueWindowFunction.<init>(LagValueWindowFunction.java:63)
    org.apache.pinot.query.service.dispatch.QueryDispatcher.submit(QueryDispatcher.java:198)
    org.apache.pinot.query.service.dispatch.QueryDispatcher.submitAndReduce(QueryDispatcher.java:95)
    org.apache.pinot.broker.requesthandler.MultiStageBrokerRequestHandler.handleRequest(MultiStageBrokerRequestHandler.java:219)
    org.apache.pinot.broker.requesthandler.BaseBrokerRequestHandler.handleRequest(BaseBrokerRequestHandler.java:133)
  • p

    Priyank Bagrecha

    09/17/2025, 10:03 PM
    will segments be built per input file for spark based batch ingestion or does data get repartitioned to ensure segments are not tiny?
    m
    • 2
    • 3