Hey there, I'm trying to run the config recommenda...
# general
d
Hey there, I'm trying to run the config recommendation engine and I didn't understand how can I fill the number of kafka partitions that we already have. And there is no example of the parameter name in the docs. I tried
Copy code
"partitionRuleParams": {
    "KAFKA_NUM_MESSAGES_PER_SEC_PER_PARTITION": 0.7,
    "KAFKA_NUM_PARTITIONS": 128
  },
But
KAFKA_NUM_PARTITIONS
is not recognized. How can I tell the current number of kafka partitions we have?
Also, even when I remove this property, I don't get the
realTimeProvisionRecommendations
. And
2147483647
kafka partitions seem to be a little bit too much 😆 Here's the body I'm sending:
Copy code
{
  "schema": {
    "dimensionFieldSpecs": [
      {
        "averageLength": 36,
        "cardinality": 900000000,
        "name": "responseId",
        "dataType": "STRING"
      },
      {
        "averageLength": 36,
        "cardinality": 300000,
        "name": "formId",
        "dataType": "STRING"
      },
      {
        "averageLength": 36,
        "cardinality": 50000,
        "name": "channelId",
        "dataType": "STRING"
      },
      {
        "averageLength": 25,
        "cardinality": 5,
        "name": "channelPlatform",
        "dataType": "STRING"
      },
      {
        "averageLength": 36,
        "cardinality": 7000,
        "name": "companyId",
        "dataType": "STRING"
      },
      {
        "cardinality": 2,
        "name": "submitted",
        "dataType": "BOOLEAN"
      },
      {
        "cardinality": 2,
        "name": "deleted",
        "dataType": "BOOLEAN"
      }
    ],
    "schemaName": "responseCount"
  },
  "queriesWithWeights": {
    "select DATETIMECONVERT(createdAt, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd''T''HH:mm:ss.SSSZ', '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy', '1:DAYS') as timeWindow, channelPlatform, deleted, submitted, count(*) as total from responseCount where companyId = '<redacted>' group by timeWindow, channelPlatform, deleted, submitted order by timeWindow, channelPlatform limit 10000000": 1
  },
  "tableType": "REALTIME",
  "qps": 5,
  "latencySLA": 2000,
  "partitionRuleParams": {
    "KAFKA_NUM_MESSAGES_PER_SEC_PER_PARTITION": 0.7
  },
  "rulesToExecute": {
    "recommendRealtimeProvisioning": true
  }
}
The response I get:
Copy code
{
  "realtimeProvisioningRecommendations": {},
  "segmentSizeRecommendations": {
    "message": "Segment sizing for realtime-only tables is done via Realtime Provisioning Rule",
    "numRowsPerSegment": 0,
    "numSegments": 0,
    "segmentSize": 0
  },
  "indexConfig": {
    "sortedColumnOverwritten": true,
    "invertedIndexColumns": [],
    "noDictionaryColumns": [
      "responseId"
    ],
    "rangeIndexColumns": [],
    "sortedColumn": "companyId",
    "bloomFilterColumns": [
      "companyId"
    ],
    "onHeapDictionaryColumns": [],
    "varLengthDictionaryColumns": [
      "formId",
      "companyId",
      "channelPlatform",
      "channelId"
    ]
  },
  "partitionConfig": {
    "numKafkaPartitions": 2147483647,
    "numPartitionsRealtime": 1,
    "partitionDimension": "",
    "numPartitionsOffline": 1,
    "numPartitionsOfflineOverwritten": false,
    "numPartitionsRealtimeOverwritten": false,
    "partitionDimensionOverwritten": false
  },
  "flaggedQueries": {
    "flaggedQueries": {
      "select DATETIMECONVERT(createdAt, '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd''T''HH:mm:ss.SSSZ', '1:MILLISECONDS:SIMPLE_DATE_FORMAT:yyyy', '1:DAYS') as timeWindow, channelPlatform, deleted, submitted, count(*) as total from responseCount where companyId = '<redacted>' group by timeWindow, channelPlatform, deleted, submitted order by timeWindow, channelPlatform limit 10000000": "Warning: Please verify if you need to pull out huge number of records for this query. Consider using smaller limit than 100000"
    }
  },
  "aggregateMetrics": false
}