Tommaso Peresson
06/06/2022, 10:23 AM{
"OFFLINE": {
"tableName": "DailyUniqHll_OFFLINE",
"tableType": "OFFLINE",
"segmentsConfig": {
"timeType": "DAYS",
"retentionTimeUnit": "DAYS",
"retentionTimeValue": "365",
"replication": "1",
"timeColumnName": "partition",
"allowNullTimeValue": false
},
"tenants": {
"broker": "DefaultTenant",
"server": "DefaultTenant"
},
"tableIndexConfig": {
"enableDefaultStarTree": false,
"starTreeIndexConfigs": [
{
"dimensionsSplitOrder": [
"partition",
"fields.1",
"fields.2",
"fields.3",
"fields.4",
"fields.5",
"fields.6",
"fields.7",
"fields.8",
"fields.9"
],
"functionColumnPairs": [
"SUM__counters.c",
"DISTINCTCOUNTHLL__hllState"
],
"maxLeafRecords": 1000
}
],
"enableDynamicStarTreeCreation": true,
"aggregateMetrics": false,
"nullHandlingEnabled": false,
"rangeIndexVersion": 2,
"autoGeneratedInvertedIndex": false,
"createInvertedIndexDuringSegmentGeneration": false
},
"metadata": {},
"ingestionConfig": {
"batchIngestionConfig": {
"segmentIngestionType": "APPEND",
"segmentIngestionFrequency": "DAILY"
},
"complexTypeConfig": {
"fieldsToUnnest": [
"fields",
"counters"
],
"delimiter": ".",
"collectionNotUnnestedToJson": "NON_PRIMITIVE"
}
},
"isDimTable": false
}
}
Schema:
{
"schemaName": "ViewElementDailyUniqHll",
"dimensionFieldSpecs": [
{
"name": "fields.1",
"dataType": "STRING"
},
{
"name": "fields.2",
"dataType": "STRING"
},
{
"name": "fields.3",
"dataType": "STRING"
},
{
"name": "fields.4",
"dataType": "STRING"
},
{
"name": "fields.5",
"dataType": "STRING"
},
{
"name": "fields.6",
"dataType": "STRING"
},
{
"name": "fields.7",
"dataType": "STRING"
},
{
"name": "fields.8",
"dataType": "STRING"
},
{
"name": "fields.9",
"dataType": "STRING"
},
{
"name": "cubeName",
"dataType": "STRING"
},
{
"name": "list",
"dataType": "LONG",
"singleValueField": false
},
{
"name": "hllState",
"dataType": "BYTES"
},
{
"name": "counters.c",
"dataType": "INT"
}
],
"dateTimeFieldSpecs": [
{
"name": "partition",
"dataType": "STRING",
"format": "1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd",
"granularity": "1:DAYS"
}
]
}
When I ingest some data I get a ~10x size increase because of DISTINCTCOUNTHLL__hllState
in the star tree index. Is this expected? Is there something misconfigured?Mark Needham
Mayank
Tommaso Peresson
06/06/2022, 1:22 PMdo you mean 10x more than if that field isn't included in the index?yes
Tommaso Peresson
06/06/2022, 1:23 PMKishore G
Mayank
Mayank
Kishore G
Kishore G