Hi. Pinot Team!
I am testing an injection job using a 'org.apache.pinot.plugin.inputformat.parquet.ParquetRecordReader'.
My problem is that the binary type of parquet file is not converted to the string type of pinot.
The data that appears as a query in the pinot is like a hex string.
What's the problem?
-- query result
"resultTable": {
"dataSchema": {
"columnNames": [
"firstname", "gender", "lastname", "score", "studentid", "subject", "timestampinepoch"
],
"columnDataTypes": [
"STRING", "STRING", "STRING", "INT", "INT", "STRING", "LONG"
]
},
"rows": [
[
"4e6174616c6965", "46656d616c65", "4a6f6e6573", 3, 109, "4d61746873", 1647980000000
]
]
},
-- parquet file schema
parquet-tools schema 6a4e9212ba501d90-c3a971300000000_1596454343_data.0.parq
message schema {
optional int32 studentid;
optional binary firstname;
optional binary lastname;
optional binary gender;
optional binary subject;
optional int32 score;
optional int64 timestampinepoch;
}
-- injestion job log
read value: {"studentid": 109, "firstname": "Natalie", "lastname": "Jones", "gender": "Female", "subject": "Maths", "score": 3, "timestampinepoch": 1647980000000}
Start building IndexCreator!
Finished records indexing in IndexCreator!
FileName set to metadata.properties
Base path set to /tmp/pinot-6a2e3b81-8eda-40c9-9a53-0d9cc03c85fd/output/tmp-8b8f9c6b-6a22-41d9-a16d-3eefe3d75d81
Finished segment seal!
Converting segment: /tmp/pinot-6a2e3b81-8eda-40c9-9a53-0d9cc03c85fd/output/batch_2022-03-22_2022-03-22 to v3 format
FileName set to metadata.properties
-- table
{
"tableName": "transcript",
"tableType": "OFFLINE",
"segmentsConfig": {
"schemaName": "transcript",
"replication": 3,
"timeColumnName": "timestampinepoch",
"timeType": "MILLISECONDS"
},
"tenants": { "broker":"DefaultTenant", "server":"DefaultTenant" },
"tableIndexConfig": {
"loadMode": "MMAP"
},
"ingestionConfig": {
"batchIngestionConfig": {
"segmentIngestionType": "APPEND",
"segmentPushFrequency": "DAILY"
}
},
"metadata": {}
}
-- schema
{
"schemaName": "transcript",
"dimensionFieldSpecs": [
{ "name": "studentid", "dataType": "INT" },
{ "name": "firstname", "dataType": "STRING" },
{ "name": "lastname", "dataType": "STRING" },
{ "name": "gender", "dataType": "STRING" },
{ "name": "subject", "dataType": "STRING" }
],
"metricFieldSpecs": [
{ "name": "score", "dataType": "INT" }
],
"dateTimeFieldSpecs": [{
"name": "timestampinepoch",
"dataType": "LONG",
"format" : "1MILLISECONDSEPOCH",
"granularity": "1:MILLISECONDS"
}
]
}