Lars-Kristian Svenøy
04/25/2022, 11:00 AMLars-Kristian Svenøy
04/25/2022, 11:02 AM{
"objectId": "00000000-0000-0000-0000-000000000000",
"jsonObject": {
"values": [
{
"id": "bob",
"names": [
"a",
"b",
"c",
"d",
"e"
]
}
]
}
}
Lars-Kristian Svenøy
04/25/2022, 11:03 AM{
"schemaName": "myObjects",
"dimensionFieldSpecs": [
{
"name": "objectId",
"dataType": "STRING"
},
{
"name": "jsonObject",
"dataType": "JSON"
}
],
"dateTimeFieldSpecs": [
{
"name": "lastModified",
"dataType": "LONG",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:DAYS"
}
]
}
Lars-Kristian Svenøy
04/25/2022, 11:03 AMLars-Kristian Svenøy
04/25/2022, 11:03 AMsaurabh dubey
04/25/2022, 11:24 AMLars-Kristian Svenøy
04/25/2022, 11:25 AMLars-Kristian Svenøy
04/25/2022, 11:25 AMLars-Kristian Svenøy
04/25/2022, 11:25 AMsaurabh dubey
04/25/2022, 11:49 AM{
"tableName": "myObject",
"tableType": "REALTIME",
"segmentsConfig": {
"timeColumnName": "lastModified",
"timeType": "MILLISECONDS",
"schemaName": "myObjects",
"replicasPerPartition": "1"
},
"tenants": {},
"tableIndexConfig": {
"loadMode": "MMAP",
"streamConfigs": {
"streamType": "kafka",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.topic.name": "object-topic",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.broker.list": "localhost:9876",
"realtime.segment.flush.threshold.time": "5000",
"realtime.segment.flush.threshold.rows": "1",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest"
}
},
"metadata": {
"customConfigs": {}
}
}
I was able to successfully able to ingest the dataLars-Kristian Svenøy
04/25/2022, 11:52 AMLars-Kristian Svenøy
04/25/2022, 11:58 AM{
"REALTIME": {
"tableName": "myObjects",
"tableType": "REALTIME",
"segmentsConfig": {
"timeType": "MILLISECONDS",
"schemaName": "myObjects",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"timeColumnName": "lastModified",
"allowNullTimeValue": false,
"replicasPerPartition": "2"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"rangeIndexVersion": 2,
"jsonIndexColumns": [
"jsonObject"
],
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false,
"loadMode": "MMAP",
"noDictionaryColumns": [
"lastModified",
"jsonObject"
],
"enableDefaultStarTree": false,
"enableDynamicStarTreeCreation": false,
"segmentPartitionConfig": {
"columnPartitionMap": {
"objectId": {
"functionName": "Murmur",
"numPartitions": 2
}
}
},
"aggregateMetrics": false,
"nullHandlingEnabled": false
},
"metadata": {
"customConfigs": {}
},
"routing": {
"segmentPrunerTypes": [
"partition"
],
"instanceSelectorType": "replicaGroup"
},
"instanceAssignmentConfigMap": {
"CONSUMING": {
"tagPoolConfig": {
"tag": "DefaultTenant",
"poolBased": false,
"numPools": 0
},
"replicaGroupPartitionConfig": {
"replicaGroupBased": true,
"numInstances": 0,
"numReplicaGroups": 2,
"numInstancesPerReplicaGroup": 8,
"numPartitions": 0,
"numInstancesPerPartition": 0
}
}
},
"upsertConfig": {
"mode": "NONE",
"hashFunction": "NONE"
},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.topic.name": "my_objects",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.broker.list": ".....",
"realtime.segment.flush.threshold.rows": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.threshold.segment.size": "200M",
"realtime.segment.flush.autotune.initialRows": "2000000",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest"
}
]
},
"transformConfigs": [],
"complexTypeConfig": {}
},
"isDimTable": false
}
}
Lars-Kristian Svenøy
04/26/2022, 10:50 AMsaurabh dubey
04/26/2022, 10:53 AMKartik Khare
04/26/2022, 10:57 AMLars-Kristian Svenøy
04/26/2022, 2:32 PMLars-Kristian Svenøy
04/26/2022, 2:33 PMLars-Kristian Svenøy
04/26/2022, 2:33 PMsaurabh dubey
04/26/2022, 2:49 PMLars-Kristian Svenøy
04/26/2022, 3:09 PMKartik Khare
04/26/2022, 3:10 PMLars-Kristian Svenøy
04/26/2022, 3:11 PM{
"schemaName": "myObjects",
"dimensionFieldSpecs": [
{
"name": "objectId",
"dataType": "STRING"
},
{
"name": "jsonObject",
"dataType": "JSON"
}
],
"dateTimeFieldSpecs": [
{
"name": "lastModified",
"dataType": "LONG",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:DAYS"
}
]
}
{
"REALTIME": {
"tableName": "myObjects",
"tableType": "REALTIME",
"segmentsConfig": {
"timeType": "MILLISECONDS",
"schemaName": "myObjects",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"timeColumnName": "lastModified",
"allowNullTimeValue": false,
"replicasPerPartition": "2"
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"rangeIndexVersion": 2,
"jsonIndexColumns": [
"jsonObject"
],
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false,
"loadMode": "MMAP",
"noDictionaryColumns": [
"lastModified",
"jsonObject"
],
"enableDefaultStarTree": false,
"enableDynamicStarTreeCreation": false,
"segmentPartitionConfig": {
"columnPartitionMap": {
"objectId": {
"functionName": "Murmur",
"numPartitions": 2
}
}
},
"aggregateMetrics": false,
"nullHandlingEnabled": false
},
"metadata": {
"customConfigs": {}
},
"routing": {
"segmentPrunerTypes": [
"partition"
],
"instanceSelectorType": "replicaGroup"
},
"instanceAssignmentConfigMap": {
"CONSUMING": {
"tagPoolConfig": {
"tag": "DefaultTenant",
"poolBased": false,
"numPools": 0
},
"replicaGroupPartitionConfig": {
"replicaGroupBased": true,
"numInstances": 0,
"numReplicaGroups": 2,
"numInstancesPerReplicaGroup": 8,
"numPartitions": 0,
"numInstancesPerPartition": 0
}
}
},
"upsertConfig": {
"mode": "NONE",
"hashFunction": "NONE"
},
"ingestionConfig": {
"streamIngestionConfig": {
"streamConfigMaps": [
{
"streamType": "kafka",
"stream.kafka.consumer.type": "lowlevel",
"stream.kafka.topic.name": "my_objects",
"stream.kafka.decoder.class.name": "org.apache.pinot.plugin.inputformat.json.JSONMessageDecoder",
"stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
"stream.kafka.broker.list": ".....",
"realtime.segment.flush.threshold.rows": "0",
"realtime.segment.flush.threshold.time": "24h",
"realtime.segment.flush.threshold.segment.size": "200M",
"realtime.segment.flush.autotune.initialRows": "2000000",
"stream.kafka.consumer.prop.auto.offset.reset": "smallest"
}
]
},
"transformConfigs": [],
"complexTypeConfig": {}
},
"isDimTable": false
}
}
Kartik Khare
04/26/2022, 3:12 PMLars-Kristian Svenøy
04/26/2022, 3:13 PMKartik Khare
04/26/2022, 3:14 PMLars-Kristian Svenøy
04/26/2022, 3:32 PMNeha Pawar
"complexTypeConfig": {}
. I was able to reproduce your issue (with the jsonObjectStr as STRING + jsonFormat). And on debugging saw that having "complexTypeConfig": {}
in the table config makes it take a differet branch of the code, which causes the problem. can you try without it?Neha Pawar
Lars-Kristian Svenøy
04/26/2022, 9:17 PMLars-Kristian Svenøy
04/26/2022, 9:17 PMNeha Pawar
Neha Pawar
Xiang Fu
Lars-Kristian Svenøy
04/27/2022, 8:26 AM