Hi team, I get a query result that is very confusi...
# troubleshooting
h
Hi team, I get a query result that is very confusing. Can anyone help to reason it? query 1 uses a condition
"timestamp" between 1662946380000 and 1662946440000
and gets 985 items. query 2 uses a very close condition
add("timestamp", latency) between 1662946380000 and 1662946440000
and also gets 985 items. (
latency
is non negative) query 3 tries to combine the two conditions together
("timestamp" between 1662946380000 and 1662946440000 or add("timestamp", latency) between 1662946380000 and 1662946440000)
, but the result shows 18253149 items. I expect the result is between 985 and 985+985.
Copy code
1. select count("*") from api_requests where "timestamp" between 1662946380000 and 1662946440000 and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'
count(*)
985

2. select count(*) from api_requests where add("timestamp", latency) between 1662946380000 and 1662946440000 and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'
count(*)
985

3. select count("*") from api_requests where ("timestamp" between 1662946380000 and 1662946440000 or add("timestamp", latency) between 1662946380000 and 1662946440000) and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'
count(*)
18253149
m
What version of Pinot are you using? Also can you share the table config and schema?
h
Hi Mayank, thank you for looking at the question. I’m using 0.11 The schema is
Copy code
{
  "schemaName": "api_requests",
  "dimensionFieldSpecs": [
    {
      "name": "event_id",
      "dataType": "STRING"
    },
    {
      "name": "entity_id",
      "dataType": "STRING"
    },
    {
      "name": "status",
      "dataType": "STRING"
    },
    {
      "name": "service_name",
      "dataType": "STRING"
    },
    {
      "name": "request_id",
      "dataType": "STRING"
    },
    {
      "name": "client_name",
      "dataType": "STRING"
    },
    {
      "name": "api",
      "dataType": "STRING"
    },
    {
      "name": "http_method",
      "dataType": "STRING"
    },
    {
      "name": "http_status",
      "dataType": "STRING"
    },
    {
      "name": "request_body",
      "dataType": "STRING",
      "maxLength": 10240
    },
    {
      "name": "response_body",
      "dataType": "STRING",
      "maxLength": 10240
    },
    {
      "name": "request_path",
      "dataType": "STRING"
    },
    {
      "name": "response_code",
      "dataType": "STRING"
    },
    {
      "name": "epoch_minute",
      "dataType": "LONG"
    },
    {
      "name": "hour",
      "dataType": "INT"
    },
    {
      "name": "day",
      "dataType": "INT"
    },
    {
      "name": "month",
      "dataType": "INT"
    },
    {
      "name": "year",
      "dataType": "INT"
    }
  ],
  "metricFieldSpecs": [
    {
      "name": "latency",
      "dataType": "LONG"
    }
  ],
  "dateTimeFieldSpecs": [
    {
      "name": "timestamp",
      "dataType": "LONG",
      "format": "1:MILLISECONDS:EPOCH",
      "granularity": "1:MILLISECONDS"
    }
  ]
}
and the table config
Copy code
{
  "tableName": "api_requests_REALTIME",
  "tableType": "REALTIME",
  "segmentsConfig": {
    "timeType": "MILLISECONDS",
    "schemaName": "api_requests",
    "retentionTimeUnit": "DAYS",
    "retentionTimeValue": "5",
    "replicasPerPartition": "1",
    "segmentPushType": "APPEND",
    "timeColumnName": "timestamp",
    "completionConfig": {
      "completionMode": "DOWNLOAD"
    },
    "minimizeDataMovement": false
  },
  "tenants": {
    "broker": "Telemetry",
    "server": "Telemetry"
  },
  "tableIndexConfig": {
    "invertedIndexColumns": [
      "request_id"
    ],
    "noDictionaryColumns": [
      "event_id",
      "timestamp",
      "response_body"
    ],
    "streamConfigs": {
      "streamType": "kafka",
      "stream.kafka.consumer.type": "lowlevel",
      "stream.kafka.topic.name": "TopicName",
      "stream.kafka.decoder.class.name": "org.apache.pinot.plugin.stream.kafka.KafkaJSONMessageDecoder",
      "stream.kafka.consumer.factory.class.name": "org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory",
      "stream.kafka.broker.list": "KafkaUrl",
      "stream.kafka.isolation.level": "read_committed",
      "stream.kafka.consumer.prop.auto.offset.reset": "smallest",
      "realtime.segment.flush.threshold.time": "60m",
      "realtime.segment.flush.threshold.rows": "0",
      "realtime.segment.flush.threshold.segment.size": "200M",
      "realtime.segment.flush.autotune.initialRows": "2000000",
      "sasl.mechanism": "PLAIN",
      "security.protocol": "SASL_SSL",
      "sasl.jaas.config": "org.apache.pinot.shaded.org.apache.kafka.common.security.scram.ScramLoginModule required username=\"usna\" password=\"ksec\";",
      "ssl.truststore.location": "/var/pinot/kafka.client.truststore.jks",
      "ssl.truststore.password": "kafka-client-secure",
      "ssl.endpoint.identification.algorithm": ""
    },
    "rangeIndexColumns": [
      "timestamp"
    ],
    "rangeIndexVersion": 2,
    "autoGeneratedInvertedIndex": false,
    "createInvertedIndexDuringSegmentGeneration": false,
    "sortedColumn": [
      "entity_id"
    ],
    "loadMode": "MMAP",
    "enableDefaultStarTree": false,
    "starTreeIndexConfigs": [
      {
        "dimensionsSplitOrder": [
          "entity_id",
          "epoch_minute",
          "api",
          "response_code",
          "http_status",
          "client_name",
          "http_method",
          "status",
          "service_name",
          "hour",
          "day",
          "month"
        ],
        "skipStarNodeCreationForDimensions": [],
        "functionColumnPairs": [
          "AVG__latency",
          "MAX__latency",
          "MIN__latency",
          "SUM__latency",
          "PERCENTILE_EST__latency",
          "COUNT__*"
        ],
        "maxLeafRecords": 5000000
      }
    ],
    "enableDynamicStarTreeCreation": true,
    "aggregateMetrics": true,
    "nullHandlingEnabled": true,
    "optimizeDictionaryForMetrics": false,
    "noDictionarySizeRatioThreshold": 0
  },
  "metadata": {
    "customConfigs": {}
  },
  "task": {
    "taskTypeConfigsMap": {
      "RealtimeToOfflineSegmentsTask": {
        "bucketTimePeriod": "20m",
        "bufferTimePeriod": "1h",
        "schedule": "0 0/10 * * * ?",
        "mergeType": "dedup",
        "maxNumRecordsPerSegment": "1500000"
      }
    }
  },
  "ingestionConfig": {
    "filterConfig": {
      "filterFunction": "Groovy({dbTable != \"api_requests\"}, dbTable)"
    },
    "transformConfigs": [
      {
        "columnName": "event_id",
        "transformFunction": "jsonPathString(metrics, '$.id')"
      },
      {
        "columnName": "entity_id",
        "transformFunction": "jsonPathString(metrics, '$.entity_id')"
      },
      {
        "columnName": "status",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.status')"
      },
      {
        "columnName": "service_name",
        "transformFunction": "jsonPathString(metrics, '$.service')"
      },
      {
        "columnName": "request_id",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.request_id')"
      },
      {
        "columnName": "client_name",
        "transformFunction": "Groovy({String trackId=metrics.signal_body.track_id; return (trackId==null || trackId.trim().isEmpty() || trackId.indexOf(\"-\")<0 ||  (trackId.trim().length()==36 && trackId.trim().indexOf(\"-\")==8 && trackId.trim().lastIndexOf(\"-\")==23 && trackId.trim().replaceAll(\"-\",\"\").length()==32)) ? null : trackId.split(\"-\")[0].trim()}, metrics)"
      },
      {
        "columnName": "api",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.api_name')"
      },
      {
        "columnName": "http_method",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.http_method')"
      },
      {
        "columnName": "http_status",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.http_status')"
      },
      {
        "columnName": "request_body",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.request_body')"
      },
      {
        "columnName": "response_body",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.response_body')"
      },
      {
        "columnName": "request_path",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.request_path')"
      },
      {
        "columnName": "response_code",
        "transformFunction": "jsonPathString(metrics, '$.signal_body.response_code')"
      },
      {
        "columnName": "latency",
        "transformFunction": "jsonPathLong(metrics, '$.signal_body.latency')"
      },
      {
        "columnName": "timestamp",
        "transformFunction": "jsonPathLong(metrics, '$.signal_body.timestamp')"
      },
      {
        "columnName": "epoch_minute",
        "transformFunction": "toEpochMinutes(jsonPathLong(metrics, '$.signal_body.timestamp'))"
      },
      {
        "columnName": "hour",
        "transformFunction": "hour(jsonPathLong(metrics, '$.signal_body.timestamp'))"
      },
      {
        "columnName": "day",
        "transformFunction": "day(jsonPathLong(metrics, '$.signal_body.timestamp'))"
      },
      {
        "columnName": "month",
        "transformFunction": "month(jsonPathLong(metrics, '$.signal_body.timestamp'))"
      },
      {
        "columnName": "year",
        "transformFunction": "year(jsonPathLong(metrics, '$.signal_body.timestamp'))"
      }
    ]
  },
  "isDimTable": false
}
Copy code
{
  "tableName": "api_requests_OFFLINE",
  "tableType": "OFFLINE",
  "segmentsConfig": {
    "timeType": "MILLISECONDS",
    "schemaName": "api_requests",
    "retentionTimeUnit": "DAYS",
    "retentionTimeValue": "90",
    "replication": "1",
    "segmentPushType": "APPEND",
    "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy",
    "timeColumnName": "timestamp",
    "minimizeDataMovement": false,
    "segmentPushFrequency": "HOURLY"
  },
  "tenants": {
    "broker": "Telemetry",
    "server": "Telemetry"
  },
  "tableIndexConfig": {
    "invertedIndexColumns": [
      "request_id"
    ],
    "noDictionaryColumns": [
      "event_id",
      "timestamp",
      "response_body"
    ],
    "createInvertedIndexDuringSegmentGeneration": false,
    "enableDefaultStarTree": false,
    "starTreeIndexConfigs": [
      {
        "dimensionsSplitOrder": [
          "entity_id",
          "epoch_minute",
          "api",
          "response_code",
          "http_status",
          "client_name",
          "http_method",
          "status",
          "service_name",
          "hour",
          "day",
          "month"
        ],
        "skipStarNodeCreationForDimensions": [],
        "functionColumnPairs": [
          "AVG__latency",
          "MAX__latency",
          "MIN__latency",
          "SUM__latency",
          "PERCENTILE_EST__latency",
          "COUNT__*"
        ],
        "maxLeafRecords": 5000000
      }
    ],
    "enableDynamicStarTreeCreation": true,
    "aggregateMetrics": true,
    "nullHandlingEnabled": true,
    "optimizeDictionaryForMetrics": false,
    "noDictionarySizeRatioThreshold": 0,
    "rangeIndexColumns": [
      "timestamp"
    ],
    "rangeIndexVersion": 2,
    "autoGeneratedInvertedIndex": false
  },
  "metadata": {
    "customConfigs": {}
  },
  "isDimTable": false
}
m
Oh, are you using star-tree index? Num docs might change depending on whether star tree aggreggated docs are being read or raw records are being read. The fact that you have a udf in filter, I think star tree is not being used in the third query
If you run the first two queries with star-tree disabled then you will likely see the same record count.
One question @Huaqiang He, the numbers you returned (985) , are they the result of the query, or the metadata (numDocsScanned)? There was a bug in older version of star Tree index with OR that was resolved in 0.11, so you may want to also double check the version you are running.
h
Hi @Mayank, 985 is the number of docs returned. For the 3rd query, useStarTree=false and useStarTree=true give the same returned number of returned docs, a big number 19960856 which is different from that of earlier I posted the queries here. I’ll let the team double check the Pinot version and see if the problem still exist.
@Mayank It may help to debug, or help to recall some fix or workaround. The trigger of the issue might be the udf. Without udf this query gets right result.
Copy code
select count("timestamp") from api_requests where (("timestamp">=1662946380000 and "timestamp"<=1662946440000) or ("timestamp">=1662946450000 and "timestamp"<=1662946480000)) and entity_id in ('8a3688f4-6390-0e77-0163-d010adc10427')
m
Can you post the broker response for count(*) including the metadata?
cc: @Jackie
h
@Mayank, @Jackie 1
Copy code
select count(*) from api_requests where ("timestamp" between 1662946380000 and 1662946440000 or add("timestamp", latency) between 1662946380000 and 1662946440000) and entity_id='8a3688f4-6390-0e77-0163-d010adc10427' option(useStarTree=false)

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        20816583
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 602,
  "numSegmentsProcessed": 461,
  "numSegmentsMatched": 74,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 20816583,
  "numEntriesScannedInFilter": 793954087,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 535530036,
  "timeUsedMs": 1576,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663635025707,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 141,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 141,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
2
Copy code
select count(*) from api_requests where ("timestamp" between 1662946380000 and 1662946440000 or add("timestamp", latency) between 1662946380000 and 1662946440000) and entity_id='8a3688f4-6390-0e77-0163-d010adc10427' option(useStarTree=true)

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        20816583
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 602,
  "numSegmentsProcessed": 461,
  "numSegmentsMatched": 74,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 20816583,
  "numEntriesScannedInFilter": 793991495,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 535547324,
  "timeUsedMs": 1475,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663635145589,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 141,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 141,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
3
Copy code
select count(*) from api_requests where ("timestamp" between 1662946380000 and 1662946440000) and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        985
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 602,
  "numSegmentsProcessed": 1,
  "numSegmentsMatched": 1,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 985,
  "numEntriesScannedInFilter": 4685,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 535563431,
  "timeUsedMs": 83,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663635265752,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 601,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 601,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
4
Copy code
select count(*) from api_requests where add("timestamp", latency) between 1662946380000 and 1662946440000 and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        985
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 602,
  "numSegmentsProcessed": 461,
  "numSegmentsMatched": 1,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 985,
  "numEntriesScannedInFilter": 495874898,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 535571309,
  "timeUsedMs": 1017,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663635325726,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 141,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 141,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
m
Will review
h
Thank you.
j
Can you please try
select count(*) from api_requests where entity_id = '8a3688f4-6390-0e77-0163-d010adc10427'
?
All these queries are not using star-tree though
Can you also try
select count(*) from api_requests where (("timestamp" * 1) between 1662946380000 and 1662946440000) and entity_id = '8a3688f4-6390-0e77-0163-d010adc10427'
h
Copy code
select count(*) from api_requests where entity_id='8a3688f4-6390-0e77-0163-d010adc10427'

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        157008993
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 594,
  "numSegmentsProcessed": 456,
  "numSegmentsMatched": 456,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 157008993,
  "numEntriesScannedInFilter": 340854806,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 534401241,
  "timeUsedMs": 539,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663642885838,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 138,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 138,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
and
Copy code
select count(*) from api_requests where (("timestamp" * 1) between 1662946380000 and 1662946440000) and entity_id = '8a3688f4-6390-0e77-0163-d010adc10427'

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        985
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 594,
  "numSegmentsProcessed": 456,
  "numSegmentsMatched": 1,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 985,
  "numEntriesScannedInFilter": 497863799,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 534401241,
  "timeUsedMs": 924,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663642885838,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 138,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 138,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
@Jackie See the above.
j
Thanks! Will investigate and get back to you
Can you also try this one
select count(*) from api_requests where (("timestamp" * 1) between <tel:1662946380000|1662946380000> and <tel:1662946440000|1662946440000> or ("timestamp" + latency) between <tel:1662946380000|1662946380000> and <tel:1662946440000|1662946440000>) and entity_id = '8a3688f4-6390-0e77-0163-d010adc10427'
h
@Jackie
Copy code
select count(*) from api_requests where (("timestamp" * 1) between 1662946380000 and 1662946440000 or ("timestamp" + latency) between 1662946380000 and 1662946440000) and entity_id = '8a3688f4-6390-0e77-0163-d010adc10427'

{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        986
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 594,
  "numSegmentsProcessed": 456,
  "numSegmentsMatched": 1,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 986,
  "numEntriesScannedInFilter": 1251914546,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 534626462,
  "timeUsedMs": 1818,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663644805701,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 138,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 138,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
j
I tried a similar query locally but didn't reproduce the issue. Can you run the problematic query with
Tracing
enabled?
h
@Jackie The problematic query
select count(*) from api_requests where (("timestamp">=1662946380000 and "timestamp"<=1662946440000) or (add("timestamp", latency)<=1662946440000 and add("timestamp", latency)>=1662946380000)) and entity_id='8a3688f4-6390-0e77-0163-d010adc10427'
with
Tracing
enabled, I get NPE and ConcurrentModificationException
Copy code
[
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.util.ConcurrentModificationException\n\tat java.base/java.util.ArrayList$Itr.checkForComodification(ArrayList.java:1043)\n\tat java.base/java.util.ArrayList$Itr.next(ArrayList.java:997)\n\tat org.apache.pinot.core.util.trace.TraceContext$Trace.toJson(TraceContext.java:91)\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:193)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.util.ConcurrentModificationException\n\tat java.base/java.util.ArrayList$Itr.checkForComodification(ArrayList.java:1043)\n\tat java.base/java.util.ArrayList$Itr.next(ArrayList.java:997)\n\tat org.apache.pinot.core.util.trace.TraceContext$Trace.toJson(TraceContext.java:91)\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:193)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.util.ConcurrentModificationException\n\tat java.base/java.util.ArrayList$Itr.checkForComodification(ArrayList.java:1043)\n\tat java.base/java.util.ArrayList$Itr.next(ArrayList.java:997)\n\tat org.apache.pinot.core.util.trace.TraceContext$Trace.toJson(TraceContext.java:91)\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:193)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.util.ConcurrentModificationException\n\tat java.base/java.util.ArrayList$Itr.checkForComodification(ArrayList.java:1043)\n\tat java.base/java.util.ArrayList$Itr.next(ArrayList.java:997)\n\tat org.apache.pinot.core.util.trace.TraceContext$Trace.toJson(TraceContext.java:91)\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:193)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.lang.NullPointerException\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:192)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQueryInternal(ServerQueryExecutorV1Impl.java:239)\n\tat org.apache.pinot.core.query.executor.ServerQueryExecutorV1Impl.processQuery(ServerQueryExecutorV1Impl.java:134)\n\tat org.apache.pinot.core.query.executor.QueryExecutor.processQuery(QueryExecutor.java:60)",
    "errorCode": 450
  },
  {
    "message": "InternalError:\njava.util.ConcurrentModificationException\n\tat java.base/java.util.ArrayList$Itr.checkForComodification(ArrayList.java:1043)\n\tat java.base/java.util.ArrayList$Itr.next(ArrayList.java:997)\n\tat org.apache.pinot.core.util.trace.TraceContext$Trace.toJson(TraceContext.java:91)\n\tat org.apache.pinot.core.util.trace.TraceContext.getTraceInfo(TraceContext.java:193)",
    "errorCode": 450
  }
]
with
Tracing
disabled, I get the way larger number of docs
Copy code
{
  "resultTable": {
    "dataSchema": {
      "columnNames": [
        "count(*)"
      ],
      "columnDataTypes": [
        "LONG"
      ]
    },
    "rows": [
      [
        21134510
      ]
    ]
  },
  "exceptions": [],
  "numServersQueried": 12,
  "numServersResponded": 12,
  "numSegmentsQueried": 603,
  "numSegmentsProcessed": 459,
  "numSegmentsMatched": 72,
  "numConsumingSegmentsQueried": 6,
  "numDocsScanned": 21134510,
  "numEntriesScannedInFilter": 810370688,
  "numEntriesScannedPostFilter": 0,
  "numGroupsLimitReached": false,
  "totalDocs": 541415453,
  "timeUsedMs": 1687,
  "offlineThreadCpuTimeNs": 0,
  "realtimeThreadCpuTimeNs": 0,
  "offlineSystemActivitiesCpuTimeNs": 0,
  "realtimeSystemActivitiesCpuTimeNs": 0,
  "offlineResponseSerializationCpuTimeNs": 0,
  "realtimeResponseSerializationCpuTimeNs": 0,
  "offlineTotalCpuTimeNs": 0,
  "realtimeTotalCpuTimeNs": 0,
  "segmentStatistics": [],
  "traceInfo": {},
  "minConsumingFreshnessTimeMs": 1663683445812,
  "numSegmentsPrunedByBroker": 0,
  "numSegmentsPrunedByServer": 144,
  "numSegmentsPrunedInvalid": 0,
  "numSegmentsPrunedByLimit": 0,
  "numSegmentsPrunedByValue": 144,
  "explainPlanNumEmptyFilterSegments": 0,
  "explainPlanNumMatchAllFilterSegments": 0,
  "numRowsResultSet": 1
}
j
@Huaqiang He Can we have a live debugging session over zoom? I think I'll need more context to debug the problem
h
Sure, I think I’ll give you my early morning if you’re in the west. That can be this Saturday morning in BJ or Friday. Does it work for you?
@Jackie
Or if you want it to be my next morning?
j
We can do it now if you are available
h
give me 10 mins
@Jackie how about now?
r
@Huaqiang He can you post explain plans for the queries (
explain plan for >query>
)?