Harish Bohara
04/15/2022, 8:07 AMKartik Khare
04/15/2022, 10:47 AMHarish Bohara
04/15/2022, 12:11 PM{
"schemaName": "aggregate_v1",
"dimensionFieldSpecs": [
{
"name": "ext_event_type",
"dataType": "STRING"
},
{
"name": "dim__channel",
"dataType": "STRING"
},
{
"name": "dim__pipeline",
"dataType": "STRING"
},
{
"name": "dim__internal",
"dataType": "STRING"
},
{
"name": "provider",
"dataType": "STRING"
},
{
"name": "status",
"dataType": "STRING"
},
{
"name": "year",
"dataType": "INT"
},
{
"name": "month",
"dataType": "INT"
},
{
"name": "day",
"dataType": "INT"
},
{
"name": "hour",
"dataType": "INT"
}
],
"dateTimeFieldSpecs": [
{
"name": "eventTime",
"dataType": "TIMESTAMP",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:MILLISECONDS"
}
]
}
{
"tableName": "aggregate_v1",
"tableType": "REALTIME",
"segmentsConfig": {
"schemaName": "aggregate_v1",
"replication": "2",
"timeColumnName": "eventTime",
"allowNullTimeValue": false,
"replicasPerPartition": "2",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "30"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant",
"tagOverrideConfig": {}
},
"tableIndexConfig": {
"noDictionaryColumns": [],
"invertedIndexColumns": [
],
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.topic.name": "MY TOPIC",
"stream.kafka.broker.list": "MY BROKER",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"realtime.segment.flush.threshold.rows": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.segment.size": "100M"
},
"rangeIndexColumns": [],
"rangeIndexVersion": 1,
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false,
"sortedColumn": [],
"bloomFilterColumns": [],
"loadMode": "MMAP",
"onHeapDictionaryColumns": [],
"varLengthDictionaryColumns": [],
"enableDefaultStarTree": false,
"enableDynamicStarTreeCreation": false,
"aggregateMetrics": false,
"nullHandlingEnabled": true,
"starTreeIndexConfigs": [
{
"dimensionsSplitOrder": [
"ext_event_type",
"dim__channel",
"status"
],
"skipStarNodeCreationForDimensions": [],
"functionColumnPairs": [
"COUNT__*"
],
"maxLeafRecords": 1
}
]
},
"metadata": {},
"quota": {},
"routing": {},
"query": {},
"ingestionConfig": {
"transformConfigs": [
{
"columnName": "year",
"transformFunction": "year(eventTime, 'Asia/Kolkata')"
},
{
"columnName": "month",
"transformFunction": "month(eventTime, 'Asia/Kolkata')"
},
{
"columnName": "day",
"transformFunction": "day(eventTime, 'Asia/Kolkata')"
},
{
"columnName": "hour",
"transformFunction": "hour(eventTime, 'Asia/Kolkata')"
}
]
},
"isDimTable": false,
"upsertConfig": {}
}
Kartik Khare
04/15/2022, 12:20 PMNavina
04/15/2022, 9:21 PMsmallest
.
I am guessing when you restart you don't have completed segments that are flushed to disk.
So, before restart, can you check if the table has completed segments ?Neha Pawar
Harish Bohara
04/16/2022, 5:15 AMHarish Bohara
04/16/2022, 5:17 AMHarish Bohara
04/16/2022, 5:20 AMNavina
04/16/2022, 5:31 AMreading from offset = “last offset which is flushed to disk” + 1. Is this understanding correct?@User : if there is a segment that was successfully flushed to disk, it should not continue reading from smallest. do you see completed segments on the server? You can also check zookeeper state for under
/<clusterName>/PROPERTYSTORE/SEGMENTS/<tableName>_REALTIME/
in the controller UIHarish Bohara
04/16/2022, 6:12 AMHarish Bohara
04/16/2022, 6:12 AMHarish Bohara
04/16/2022, 6:14 AMHarish Bohara
04/16/2022, 11:34 AMMayank
Mayank
Harish Bohara
04/17/2022, 8:02 AM-Xms512M -Xmx4G -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -Xlog:gc*:file=/opt/pinot/gc-pinot-server.log"
Harish Bohara
04/17/2022, 8:03 AMMayank