Hi <@UGRJA9TEH> <@U027NCEJKV4>, I have a trouble ...
# troubleshooting
s
Hi @Xiang Fu @Xiaobing, I have a trouble in load data from s3 to pinot offline table. Sharing tableconfig and segment which is created
Copy code
{
  "OFFLINE": {
    "tableName": "test_transcript_OFFLINE",
    "tableType": "OFFLINE",
    "segmentsConfig": {
      "schemaName": "test_transcript",
      "replication": "1",
      "timeColumnName": "timestamp",
      "segmentPushFrequency": "HOURLY",
      "segmentPushType": "APPEND",
      "replicasPerPartition": "1"
    },
    "tenants": {
      "broker": "DefaultTenant",
      "server": "DefaultTenant"
    },
    "tableIndexConfig": {
      "invertedIndexColumns": [],
      "noDictionaryColumns": [],
      "rangeIndexColumns": [],
      "rangeIndexVersion": 2,
      "autoGeneratedInvertedIndex": false,
      "createInvertedIndexDuringSegmentGeneration": false,
      "sortedColumn": [],
      "bloomFilterColumns": [],
      "loadMode": "MMAP",
      "onHeapDictionaryColumns": [],
      "varLengthDictionaryColumns": [],
      "enableDefaultStarTree": false,
      "enableDynamicStarTreeCreation": false,
      "aggregateMetrics": false,
      "nullHandlingEnabled": false
    },
    "metadata": {},
    "quota": {},
    "task": {
      "taskTypeConfigsMap": {
        "SegmentGenerationAndPushTask": {
          "schedule": "/5 * * * * ?",
          "tableMaxNumTasks": "10"
        }
      }
    },
    "routing": {},
    "query": {},
    "ingestionConfig": {
      "batchIngestionConfig": {
        "batchConfigMaps": [
          {
            "input.fs.className": "org.apache.pinot.plugin.filesystem.S3PinotFS",
            "input.fs.prop.region": "us-east-1",
            "input.fs.prop.secretKey": "*****",
            "input.fs.prop.accessKey": "*****",
            "inputDirURI": "<s3://pp-airflow-qa/dremio_test_files/jsonfiles/>",
            "includeFileNamePattern": "glob:**/*.json",
            "excludeFileNamePattern": "glob:**/*.tmp",
            "inputFormat": "json"
          }
        ],
        "segmentIngestionType": "APPEND",
        "segmentIngestionFrequency": "HOURLY"
      }
    },
    "isDimTable": false
  }
}
1
k
as if a segment was a file
k
Can you mask the s3 keys from this properties so that they are not visible to others. Secondly, in the one segment, how many records are there?
s
12 records
k
Ok. And you many files and records per file are present in the inputDirURI directory ?
s
3 files each with 12 records in it
k
Ok. and each of these files contain the exact same record?
s
no different data
but it has same timestamp column
k
yep, that's the problem
s
okay
k
Copy code
"segmentNameGenerator.type": "inputFile",    
  "segmentNameGenerator.configs.file.path.pattern": ".",
  "segmentNameGenerator.configs.segment.name.template": "${filePathPattern:\\1}"
Add this inside batchConfigMaps
file names need to be different though
s
sure I"ll make changes