Diogo Baeder
11/18/2021, 1:31 PMPriyank Bagrecha
11/18/2021, 6:53 PMAli Atıl
11/18/2021, 9:51 PMMap
11/19/2021, 10:41 PM2021/11/19 223605.496 INFO [CurrentStateComputationStage] [HelixController-pipeline-task-pinot-prod-(aa26cf97_TASK)] Event aa26cf97_TASK : Ignore a pending message ee7f9ef0-1de9-4737-b0b4-db4a4e1b9073 for a non-exist resource table0_REALTIME and partition table0__0__0__20211119T2150Z
Mahesh babu
11/22/2021, 12:15 PMMahesh babu
11/22/2021, 2:58 PMPriyank Bagrecha
11/22/2021, 7:30 PM021/11/20 00:18:41.296 ERROR [HelixStateTransitionHandler] [HelixTaskExecutor-message_handle_thread] Exception while executing a state transition task km_mp_play_startree__103__0__20211120T0018Z
java.lang.reflect.InvocationTargetException: null
at jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:?]
at jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:?]
at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:?]
at java.lang.reflect.Method.invoke(Method.java:566) ~[?:?]
at org.apache.helix.messaging.handling.HelixStateTransitionHandler.invoke(HelixStateTransitionHandler.java:404) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.helix.messaging.handling.HelixStateTransitionHandler.handleMessage(HelixStateTransitionHandler.java:331) [pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.helix.messaging.handling.HelixTask.call(HelixTask.java:97) [pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.helix.messaging.handling.HelixTask.call(HelixTask.java:49) [pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at java.util.concurrent.FutureTask.run(FutureTask.java:264) [?:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.lang.OutOfMemoryError: Direct buffer memory
at java.nio.Bits.reserveMemory(Bits.java:175) ~[?:?]
at java.nio.DirectByteBuffer.<init>(DirectByteBuffer.java:118) ~[?:?]
at java.nio.ByteBuffer.allocateDirect(ByteBuffer.java:317) ~[?:?]
at org.apache.pinot.segment.spi.memory.PinotByteBuffer.allocateDirect(PinotByteBuffer.java:38) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.spi.memory.PinotDataBuffer.allocateDirect(PinotDataBuffer.java:115) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.local.io.writer.impl.DirectMemoryManager.allocateInternal(DirectMemoryManager.java:53) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.local.io.readerwriter.RealtimeIndexOffHeapMemoryManager.allocate(RealtimeIndexOffHeapMemoryManager.java:80) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.local.realtime.impl.forward.FixedByteSVMutableForwardIndex.addBuffer(FixedByteSVMutableForwardIndex.java:208) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.local.realtime.impl.forward.FixedByteSVMutableForwardIndex.<init>(FixedByteSVMutableForwardIndex.java:77) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.segment.local.indexsegment.mutable.MutableSegmentImpl.<init>(MutableSegmentImpl.java:308) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.core.data.manager.realtime.LLRealtimeSegmentDataManager.<init>(LLRealtimeSegmentDataManager.java:1364) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.core.data.manager.realtime.RealtimeTableDataManager.addSegment(RealtimeTableDataManager.java:344) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.server.starter.helix.HelixInstanceDataManager.addRealtimeSegment(HelixInstanceDataManager.java:162) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.server.starter.helix.SegmentOnlineOfflineStateModelFactory$SegmentOnlineOfflineStateModel.onBecomeOnlineFromOffline(SegmentOnlineOfflineStateModelFactory.java:164) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
at org.apache.pinot.server.starter.helix.SegmentOnlineOfflineStateModelFactory$SegmentOnlineOfflineStateModel.onBecomeConsumingFromOffline(SegmentOnlineOfflineStateModelFactory.java:86) ~[pinot-all-0.9.0-jar-with-dependencies.jar:0.9.0-cf8b84e8b0d6ab62374048de586ce7da21132906]
... 12 more
i have tried increasing heap size (right now at 16G) and i am still running into this issue. i am using 5 servers to consume from a topic with 128 partitions, with an event rate of about 7M events per minute. I see 26 segments on 3 servers and 25 on 2 servers in Bad state.Yeongju Kang
11/23/2021, 8:06 AMYeongju Kang
11/23/2021, 10:31 AMAli Atıl
11/23/2021, 2:12 PMDeepak Mishra
11/23/2021, 6:57 PMMahesh babu
11/24/2021, 5:17 AMAyush Kumar Jha
11/24/2021, 6:20 PMjava.lang.IllegalStateException: Unable to extract out the relative path for input file file path "file path"
in 0.8.0 and 0.9.0 but it is working fine in 0.7.1Vibhor Jain
11/25/2021, 6:18 AMMahesh babu
11/25/2021, 11:33 AMAli Atıl
11/25/2021, 2:39 PMPrashanth Rao
11/26/2021, 10:58 AM[Consumer clientId=consumer-2, groupId=event_template_mapping_REALTIME_1627646764788_0] Group coordinator <*> (id: 795314267 rack: null) is unavailable or invalid, will attempt rediscovery
[Consumer clientId=consumer-2, groupId=event_template_mapping_REALTIME_1627646764788_0] Discovered group coordinator <*> (id: 795314267 rack: null)
[Consumer clientId=consumer-2, groupId=event_template_mapping_REALTIME_1627646764788_0] (Re-)joining group
And finally after 6-7 hours saw this message , which basically didn't fetch any partition .
[Consumer clientId=consumer-4, groupId=event_template_mapping_REALTIME_1627646764788_0] Successfully joined group with generation 6
[Consumer clientId=consumer-4, groupId=event_template_mapping_REALTIME_1627646764788_0] Setting newly assigned partitions []
Map
11/29/2021, 5:23 PMFULL
upsert
mode, we notice the number of rows returned for the same query varies across times, but it is supposed to remain consistent.
For example, there are 1000 unique values keyed on column A
, which we use as the primary key for the pinot table table1
. A query like select count(1) from table1
can return values 1567, or 789, in addition to 1000.
In the case of 2000, you can find duplicated rows with different timestamps such as
| A | currenttime |
| - | ------------ |
| a | 1:00:00 |
| a | 1:00:01 |
| b | 1:00:00 |
| b | 1:00:03 |
...
In the case of 789, many rows are simply missing…
We suspect this is related to the process of updating the index for the upserted table. Have anyone seen this before?Anusha
11/30/2021, 3:01 AMyelim yu
11/30/2021, 5:08 AMeywek
11/30/2021, 9:24 AMLIKE
operator to JSON_MATCH
? I’m currently using
REGEXP_LIKE(JSONEXTRACTSCALAR("labels", '$.demande_intention', 'STRING'), 'terminal')
but it’s very slow (even with small number of scanned documents (21). And maybe having it directly with JSON_MATCH
could speed-up this operation?
JSON_MATCH("labels", 'demande_intention LIKE ''terminal''')
Thank youAnish Nair
11/30/2021, 3:20 PMMahesh babu
12/01/2021, 7:08 AMSyed Akram
12/01/2021, 11:26 AMMap
12/01/2021, 10:25 PMRealtimeProvisioningHelper -tableConfigFile <tableConfig> -numPartitions 1 -pushFrequency null -numHosts 1,2,3,4 -numHours 1,2,3,4,56,12,18,24 -sampleCompletedSegmentDir <path-to-segment> -ingestionRate 1000 -maxUsableHostMemory 5120G -retentionHours 1
Note:
* Table retention and push frequency ignored for determining retentionHours since it is specified in command
* See <https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime>
Memory used per host (Active/Mapped)
numHosts --> 1 |2 |3 |4 |
numHours
1 --------> 8.1G/295.67G |4.05G/147.83G |4.05G/147.83G |4.05G/147.83G |
2 --------> NA |NA |NA |NA |
3 --------> NA |NA |NA |NA |
4 --------> NA |NA |NA |NA |
12 --------> NA |NA |NA |NA |
18 --------> NA |NA |NA |NA |
24 --------> NA |NA |NA |NA |
56 --------> NA |NA |NA |NA |
Optimal segment size
numHosts --> 1 |2 |3 |4 |
numHours
1 --------> 1.51G |1.51G |1.51G |1.51G |
2 --------> NA |NA |NA |NA |
3 --------> NA |NA |NA |NA |
4 --------> NA |NA |NA |NA |
12 --------> NA |NA |NA |NA |
18 --------> NA |NA |NA |NA |
24 --------> NA |NA |NA |NA |
56 --------> NA |NA |NA |NA |
Consuming memory
numHosts --> 1 |2 |3 |4 |
numHours
1 --------> 8.1G |4.05G |4.05G |4.05G |
2 --------> NA |NA |NA |NA |
3 --------> NA |NA |NA |NA |
4 --------> NA |NA |NA |NA |
12 --------> NA |NA |NA |NA |
18 --------> NA |NA |NA |NA |
24 --------> NA |NA |NA |NA |
56 --------> NA |NA |NA |NA |
Total number of segments queried per host (for all partitions)
numHosts --> 1 |2 |3 |4 |
numHours
1 --------> 2 |1 |1 |1 |
2 --------> NA |NA |NA |NA |
3 --------> NA |NA |NA |NA |
4 --------> NA |NA |NA |NA |
12 --------> NA |NA |NA |NA |
18 --------> NA |NA |NA |NA |
24 --------> NA |NA |NA |NA |
56 --------> NA |NA |NA |NA |
Map
12/02/2021, 3:20 AMSyed Akram
12/02/2021, 7:01 AMYeongju Kang
12/02/2021, 7:35 AM{
"schemaName": "transcript",
"dimensionFieldSpecs": [
{
"name": "studentID",
"dataType": "INT"
},
{
"name": "firstName",
"dataType": "STRING"
},
{
"name": "lastName",
"dataType": "STRING"
},
{
"name": "gender",
"dataType": "STRING"
},
{
"name": "subject",
"dataType": "STRING"
},
{
"name": "doNotFailPlease",
"dataType": "STRING",
"defaultNullValue": ""
},
{
"name": "ts2",
"dataType": "TIMESTAMP"
}
],
"metricFieldSpecs": [
{
"name": "score",
"dataType": "FLOAT"
}
],
"dateTimeFieldSpecs": [
{
"name": "ts",
"dataType": "TIMESTAMP",
"format": "1:SECONDS:EPOCH",
"granularity": "1:SECONDS"
}
],
"primaryKeyColumns": [
"studentID"
]
}
2. hybrid_offline.json
{
"tableName": "transcript_hybrid",
"tableType": "OFFLINE",
"segmentsConfig": {
"replication": 1,
"timeColumnName": "ts",
"timeType": "SECONDS"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"loadMode": "MMAP"
},
"metadata": {}
3. hybrid_realtime.json
{
"tableName": "transcript_hybrid",
"tableType": "REALTIME",
"segmentsConfig": {
"timeColumnName": "ts",
"timeType": "SECONDS",
"schemaName": "transcript",
"replicasPerPartition": "1"
},
"tenants": {},
"tableIndexConfig": {
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.topic.name": "transcript",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.broker.list": "kafka.local-pinot.svc.cluster.local:9092",
"realtime.segment.flush.threshold.time": "6h"
}
},
"metadata": {
"customConfigs": {}
},
"routing": {
"instanceSelectorType": "strictReplicaGroup"
},
"upsertConfig": {
"mode": "FULL"
}
}
Deepak Mishra
12/02/2021, 9:46 AMElon
12/02/2021, 4:41 PM