Hi Team, This is regarding Pinot servers getting ...
# troubleshooting
a
Hi Team, This is regarding Pinot servers getting OOM. We have a server with following spec: Server mem : 64gb xmx allocated to pinot server : 55gb • JVM used for this pinot server goes upto 49gb sometimes ( screenshot for reference server name: max-pinot1.srv.media.net), and at the same time pinot usage goes upto 60gb. ( screenshot for reference) • We have a real time table with upsert enabled. This server has 90million primary key ( long type ) • is this expected ? Is there anyway to know biferfication on how pinot component is using the memory.
k
Hi, The upsert state is currently stored in memory which might be causing this issue. Each server holds the upsert state for a single input stream partition. The best way would be to split your stream. That being said 90 million keys aren't a lot if they all contain only a single long value. Can you share your table config as well
v
Hi Ashish , We has a exact same use case and faced exact same issue . Look if your double columns are getting added to dictionary by default .
To remove them have a noDictionaryColumns in your table configuration and add the double columns there .How we figured this out was that we took a heap dump of the data and analyzed . Looked like we had a number of double columns which were getting added to dictionary and used hell lot of space .
a
You mean dimension with double type?
v
Also share the table config for better solutions .
h
I am not a expert on Pinot - had some learning from Pinot upsert tables. I also have a use case for Upset with PK as UUID + 2 more columns. I had 32Gb nodes (4 nodes). I gave 12 GB to JVM and rest was used by Pinot server. I had gone till 200M+ without any issue (I used to get OMM > 400M records - because my nodes did not have enough memory). If you have all this data available in Kafka topic (for me i had Kafka retention of 8 days - which had > 500M records, and it allowed me to do test different table configurations ); you can create a new table with 16GB for JVM and rest is free for Pinot server.
a
Copy code
{
  "REALTIME": {
    "tableName": "max_reporting__REALTIME",
    "tableType": "REALTIME",
    "segmentsConfig": {
      "timeType": "HOURS",
      "schemaName": "reporting",
      "retentionTimeUnit": "DAYS",
      "retentionTimeValue": "25",
      "replicasPerPartition": "2",
      "timeColumnName": "statsdatehour_epoch",
      "completionConfig": {
        "completionMode": "DOWNLOAD"
      }
    },
    "tenants": {
      "broker": "DefaultTenant",
      "server": "DefaultTenant"
    },
    "tableIndexConfig": {
      "invertedIndexColumns": [
        "entity_id",
        "entity_2",
        "entity_3"
      ],
      "streamConfigs": {
        "streamType": "kafka",
        "stream.kafka.consumer.type": "lowlevel",
        "stream.kafka.topic.name": "c8.max_reporting_olap",
        "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
        "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
        "stream.kafka.broker.list": "c8-logging-kafka-5-prod:9092,c8-logging-kafka-6-prod:9092,c8-logging-kafka-7-prod:9092,c8-logging-kafka-8-prod:9092,c8-logging-kafka-9-prod:9092,c8-logging-kafka-10-prod:9092,c8-logging-kafka-11-prod:9092",
        "realtime.segment.flush.threshold.rows": "0",
        "realtime.segment.flush.threshold.size": "0",
        "realtime.segment.flush.threshold.time": "24h",
        "realtime.segment.flush.threshold.segment.size": "500M",
        "realtime.segment.flush.autotune.initialRows": "10000000",
        "stream.kafka.consumer.prop.auto.offset.reset": "smallest"
      },
      "loadMode": "MMAP",
      "enableDefaultStarTree": false,
      "enableDynamicStarTreeCreation": false,
      "aggregateMetrics": false,
      "nullHandlingEnabled": false,
      "rangeIndexVersion": 1,
      "autoGeneratedInvertedIndex": false,
      "createInvertedIndexDuringSegmentGeneration": false
    },
    "metadata": {
      "customConfigs": {}
    },
    "routing": {
      "instanceSelectorType": "strictReplicaGroup"
    },
    "upsertConfig": {
      "mode": "FULL",
      "comparisonColumn": "process_datetime",
      "hashFunction": "NONE"
    },
    "ingestionConfig": {
      "transformConfigs": [
        {
          "columnName": "statsdatehour_epoch",
          "transformFunction": "FromDateTime(\"stats_date_hour\", 'YYYYMMddHH')"
        },
        {
          "columnName": "stats_datetime",
          "transformFunction": "FromDateTime(concat(\"stats_date_hour\",\"minute_start_id\",''), 'YYYYMMddHHmm')"
        }
      ]
    },
    "isDimTable": false
  }
}
@Vibhor Jaiswal @Kartik Khare
k
Can you also share the schema and the pinot version you are using?
a
Copy code
{
  "schemaName": "reporting",
  "dimensionFieldSpecs": [
    {
      "name": "unique_id",
      "dataType": "LONG"
    },
    {
      "name": "hour_id",
      "dataType": "INT"
    },
    {
      "name": "minute_start_id",
      "dataType": "INT"
    },
    {
      "name": "entity_id_1",
      "dataType": "INT"
    },
    {
      "name": "entity_id_2",
      "dataType": "INT"
    },
    {
      "name": "served_category_id",
      "dataType": "INT"
    },
    {
      "name": "billing_entity_account_id",
      "dataType": "INT"
    },
    {
      "name": "entity_id_3",
      "dataType": "INT"
    },
    {
      "name": "entity_id_4",
      "dataType": "INT"
    },
    {
      "name": "entity_id_5",
      "dataType": "INT"
    },
    {
      "name": "entity_id_6",
      "dataType": "INT"
    },
    {
      "name": "entity_id_7",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_8",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_9",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_10",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_11",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_12",
      "dataType": "STRING"
    },
    {
      "name": "country_id",
      "dataType": "INT"
    },
    {
      "name": "state_id",
      "dataType": "INT"
    },
    {
      "name": "city_id",
      "dataType": "INT"
    },
    {
      "name": "postal_code",
      "dataType": "STRING"
    },
    {
      "name": "dma_code",
      "dataType": "INT"
    },
    {
      "name": "device_type_id",
      "dataType": "INT"
    },
    {
      "name": "os",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_13",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_14",
      "dataType": "STRING"
    },
    {
      "name": "Channel",
      "dataType": "STRING"
    },
    {
      "name": "reporting_group",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_15",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_16",
      "dataType": "INT"
    },
    {
      "name": "non_exclusive_filter",
      "dataType": "STRING"
    },
    {
      "name": "browser_id",
      "dataType": "INT"
    },
    {
      "name": "event_status",
      "dataType": "INT"
    },
    {
      "name": "entity_id_17",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_18",
      "dataType": "INT"
    },
    {
      "name": "traffic_source_type",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_19",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_49",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_48",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_47",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_46",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_45",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_41",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_42",
      "dataType": "INT"
    },
    {
      "name": "entity_id_43",
      "dataType": "INT"
    },
    {
      "name": "entity_id_44",
      "dataType": "INT"
    },
    {
      "name": "email_domain_id",
      "dataType": "INT"
    },
    {
      "name": "entity_id_20",
      "dataType": "INT"
    },
    {
      "name": "entity_id_21",
      "dataType": "INT"
    },
    {
      "name": "entity_id_22",
      "dataType": "INT"
    },
    {
      "name": "entity_id_23",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_24",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_40",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_39",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_38",
      "dataType": "INT"
    },
    {
      "name": "entity_id_37",
      "dataType": "INT"
    },
    {
      "name": "entity_id_36",
      "dataType": "BOOLEAN"
    },
    {
      "name": "integration_type_id",
      "dataType": "INT"
    },
    {
      "name": "entity_id_35",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_34",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_33",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_32",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_31",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_30",
      "dataType": "INT"
    },
    {
      "name": "entity_id_29",
      "dataType": "INT"
    },
    {
      "name": "entity_id_27",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_28",
      "dataType": "STRING"
    },
    {
      "name": "entity_id_26",
      "dataType": "STRING",
      "singleValueField": false
    },
    {
      "name": "bidding_strategy_id",
      "dataType": "INT"
    },
    {
      "name": "entity_id_25",
      "dataType": "STRING"
    }
  ],
  "metricFieldSpecs": [
    {
      "name": "measure_1",
      "dataType": "INT"
    },
    {
      "name": "measure_2",
      "dataType": "INT"
    },
    {
      "name": "measure_3",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_4",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_5",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_6",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_7",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_8",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_9",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_10",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_11",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_12",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_13",
      "dataType": "INT"
    },
    {
      "name": "measure_14",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_15",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_16",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_17",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_18",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_19",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_20",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_21",
      "dataType": "DOUBLE"
    },
    {
      "name": "measure_22",
      "dataType": "INT"
    },
    {
      "name": "measure_23",
      "dataType": "INT"
    },
    {
      "name": "measure_24",
      "dataType": "DOUBLE"
    }
  ],
  "dateTimeFieldSpecs": [
    {
      "name": "stats_date_hour",
      "dataType": "LONG",
      "format": "1:HOURS:SIMPLE_DATE_FORMAT:yyyyMMddHH",
      "granularity": "1:HOURS"
    },
    {
      "name": "process_datetime",
      "dataType": "STRING",
      "format": "1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss.SSS",
      "granularity": "1:SECONDS"
    },
    {
      "name": "statsdatehour_epoch",
      "dataType": "TIMESTAMP",
      "format": "1:MILLISECONDS:EPOCH",
      "granularity": "1:HOURS"
    },
    {
      "name": "stats_datetime",
      "dataType": "TIMESTAMP",
      "format": "1:MILLISECONDS:EPOCH",
      "granularity": "1:MINUTES"
    }
  ],
  "primaryKeyColumns": [
    "unique_id"
  ]
}
We are using pinot 0.10
c
@Anish Nair did you solve this problem? We too have the same problem.
a
Its being sometime , but i guess we simply increased the memory. @coco