Kevin Liu
06/08/2022, 8:10 AMAbhijeet Kushe
07/18/2022, 6:29 PM{
"tableName": "events",
"tableType": "REALTIME",
"segmentsConfig": {
"timeColumnName": "eventTimestamp",
"timeType": "MILLISECONDS",
"schemaName": "events",
"replicasPerPartition": "1",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"segmentPushType": "APPEND"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kinesis",
"stream.kinesis.topic.name": "events-stream",
"region": "us-east-1",
"shardIteratorType": "LATEST",
"stream.kinesis.consumer.type": "lowlevel",
"stream.kinesis.fetch.timeout.millis": "30000",
"stream.kinesis.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kinesis.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory",
"realtime.segment.flush.threshold.size": "5000000",
"realtime.segment.flush.threshold.time": "1d"
}
},
"upsertConfig": {
"mode": "FULL"
},
"routing": {
"instanceSelectorType": "strictReplicaGroup"
},
"metadata": {
"customConfigs": {}
}
}
I created a new config and made the below changes
{
"tableName": "events",
"tableType": "REALTIME",
"segmentsConfig": {
"timeColumnName": "eventTimestamp",
"timeType": "MILLISECONDS",
"schemaName": "events",
"replicasPerPartition": "3",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "1826",
"segmentPushType": "APPEND",
"replicaGroupStrategyConfig": {
"partitionColumn": "accountId",
"numInstancesPerPartition": 1
}
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kinesis",
"stream.kinesis.topic.name": "events-stream",
"region": "us-east-1",
"shardIteratorType": "LATEST",
"stream.kinesis.consumer.type": "lowlevel",
"stream.kinesis.fetch.timeout.millis": "30000",
"stream.kinesis.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kinesis.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory",
"realtime.segment.flush.threshold.size": "5000000",
"realtime.segment.flush.threshold.time": "1d"
},
"segmentPartitionConfig": {
"columnPartitionMap": {
"accountId": {
"functionName": "Modulo",
"numPartitions": 4
}
}
}
},
"upsertConfig": {
"mode": "FULL"
},
"routing": {
"segmentPrunerTypes": [
"partition"
],
"instanceSelectorType": "strictReplicaGroup"
},
"metadata": {
"customConfigs": {}
}
}
I tried to rebalance the cluster with the below endpoint and have passed the params in the image.
/tables/{tableName}/rebalance
abhinav wagle
07/19/2022, 11:03 PMvalidationTypesToSkip
mean for Pinot PUT /tables/{tableName}
api :Sukesh Boggavarapu
07/19/2022, 11:13 PMNeeraja Sridharan
08/10/2022, 6:06 PMpartition function
& numPartitions
. Based on this doc, I saw that Pinot currently supports Modulo, Murmur, ByteArray and HashCode hash functions
. Any guidance around when to choose which function & also, your recommendation to use Murmur (not modulo)?
Thread in Slack ConversationSukesh Boggavarapu
10/11/2022, 2:23 AM