Kartik Khare
10/10/2021, 5:16 AMAbhijeet Kushe
10/11/2021, 1:07 PMAbhijeet Kushe
01/25/2022, 2:45 PMAbhijeet Kushe
11/28/2022, 8:21 PMNeha Pawar
Abhijeet Kushe
11/29/2022, 2:14 AMNeha Pawar
Neha Pawar
Abhijeet Kushe
11/29/2022, 1:13 PMRakesh Bobbala
02/10/2023, 7:54 PM{
"tableName": "backend_pre_processed",
"tableType": "REALTIME",
"segmentsConfig": {
"schemaName": "backend_pre_processed",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "90",
"replication": "1",
"replicasPerPartition": "1",
"timeColumnName": "spark_write_timestamp",
"minimizeDataMovement": false
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant",
"tagOverrideConfig": {}
},
"tableIndexConfig": {
"invertedIndexColumns": [],
"noDictionaryColumns": [],
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false,
"sortedColumn": [],
"bloomFilterColumns": [],
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kinesis",
"stream.kinesis.topic.name": "backend-processed-events",
"stream.kinesis.consumer.type": "lowlevel",
"stream.kinesis.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kinesis.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kinesis.KinesisConsumerFactory",
"realtime.segment.flush.threshold.time": "7d",
"realtime.segment.flush.threshold.rows": "1000000",
"region": "us-east-1",
"maxRecordsToFetch": "3",
"shardIteratorType": "LATEST",
"stream.kinesis.fetch.timeout.millis": "30000"
},
"onHeapDictionaryColumns": [],
"varLengthDictionaryColumns": [],
"enableDefaultStarTree": false,
"enableDynamicStarTreeCreation": false,
"aggregateMetrics": false,
"nullHandlingEnabled": false,
"optimizeDictionaryForMetrics": false,
"noDictionarySizeRatioThreshold": 0,
"rangeIndexColumns": [],
"rangeIndexVersion": 2
},
"metadata": {},
"quota": {},
"routing": {},
"query": {},
"ingestionConfig": {
"segmentTimeValueCheck": true,
"transformConfigs": [
{
"columnName": "spark_write_timestamp_epoch",
"transformFunction": "FromDateTime(spark_write_time, 'yyyy-MM-dd''T''HH:mm:ss.SSS''Z')"
},
{
"columnName": "spark_write_timestamp",
"transformFunction": "TRIM(spark_write_time)"
},
{
"columnName": "row_created_time",
"transformFunction": "now()"
},
{
"columnName": "m_event",
"transformFunction": "TRIM(event)"
}
],
"continueOnError": false,
"rowTimeValueCheck": false
},
"isDimTable": false
}
Also I want to know if this is the right way to test the ingestion time.