Wojciech Wasik
09/30/2022, 12:37 PMcsv
file. I have the fallowing configsWojciech Wasik
09/30/2022, 12:37 PM{
"tableName": "transactions",
"tableType": "OFFLINE",
"segmentsConfig": {
"replication": 1,
"timeColumnName": "Timestamp",
"schemaName": "transactions"
},
"tenants": {},
"tableIndexConfig": {
"loadMode": "MMAP"
},
"ingestionConfig": {
"batchIngestionConfig": {
"segmentIngestionType": "APPEND",
"segmentIngestionFrequency": "DAILY"
},
"transformConfigs": [
{
"columnName": "Timestamp",
"transformFunction": "fromDateTime(dateTimeStr, 'yyyy-MM-dd''T''HH:mm:ss''Z')"
}
]
},
"metadata": {}
}
Wojciech Wasik
09/30/2022, 12:38 PM{
"schemaName": "transactions",
"metricFieldSpecs": [
{
"name": "TakerAmount",
"dataType": "DOUBLE"
},
{
"name": "TakerVolumeUSD",
"dataType": "DOUBLE"
},
{
"name": "MakerAmount",
"dataType": "DOUBLE"
},
{
"name": "MakerVolumeUSD",
"dataType": "DOUBLE"
},
{
"name": "GasLimit",
"dataType": "LONG"
},
{
"name": "GasUsed",
"dataType": "LONG"
},
{
"name": "GasPrice",
"dataType": "LONG"
},
{
"name": "GasFees",
"dataType": "DOUBLE"
},
{
"name": "TipGasFees",
"dataType": "DOUBLE"
},
{
"name": "BurntGasFees",
"dataType": "DOUBLE"
},
{
"name": "ReimbursedGasFees",
"dataType": "DOUBLE"
},
{
"name": "GasFeesUSD",
"dataType": "DOUBLE"
},
{
"name": "TipGasFeesUSD",
"dataType": "DOUBLE"
},
{
"name": "BurntGasFeesUSD",
"dataType": "DOUBLE"
},
{
"name": "ReimbursedGasFeesUSD",
"dataType": "DOUBLE"
},
{
"name": "RakerTokenPriceUSD",
"dataType": "DOUBLE"
},
{
"name": "MakerTokenPriceUSD",
"dataType": "DOUBLE"
},
{
"name": "VolumeUSD",
"dataType": "DOUBLE"
}
],
"dimensionFieldSpecs": [
{
"name": "TransactionHash",
"dataType": "STRING"
},
{
"name": "LockNumber",
"dataType": "LONG"
},
{
"name": "ChainName",
"dataType": "STRING"
},
{
"name": "TransactionFrom",
"dataType": "STRING"
},
{
"name": "TransactionTo",
"dataType": "STRING"
},
{
"name": "Affiliate",
"dataType": "STRING"
},
{
"name": "FeeRecipient",
"dataType": "STRING"
},
{
"name": "Taker",
"dataType": "STRING"
},
{
"name": "Maker",
"dataType": "STRING"
},
{
"name": "LiquiditySource",
"dataType": "STRING"
},
{
"name": "App",
"dataType": "STRING"
},
{
"name": "Router",
"dataType": "STRING"
},
{
"name": "TakerToken",
"dataType": "STRING"
},
{
"name": "TakerTokenSymbol",
"dataType": "STRING"
},
{
"name": "MakerToken",
"dataType": "STRING"
},
{
"name": "MakerTokenSymbol",
"dataType": "STRING"
},
{
"name": "IsGasless",
"dataType": "BOOLEAN"
},
{
"name": "IsMutihop",
"dataType": "BOOLEAN"
},
{
"name": "IsMultiplex",
"dataType": "BOOLEAN"
},
{
"name": "HasRFQ",
"dataType": "BOOLEAN"
},
{
"name": "HasLimitOrder",
"dataType": "BOOLEAN"
},
{
"name": "HasDirect",
"dataType": "BOOLEAN"
},
{
"name": "NativeOrderType",
"dataType": "STRING"
},
{
"name": "TransformerFeeRecipient",
"dataType": "STRING"
},
{
"name": "TransformerFeeToken",
"dataType": "STRING"
},
{
"name": "TransformerFeeTokenSymbol",
"dataType": "STRING"
},
{
"name": "TransformerFeeTokenAmount",
"dataType": "STRING"
},
{
"name": "TransformerFeeVolumeUSD",
"dataType": "STRING"
},
{
"name": "CalledFunction",
"dataType": "STRING"
},
{
"name": "MaxFeePerGas",
"dataType": "STRING"
},
{
"name": "MaxPriorityFeePerGas",
"dataType": "STRING"
},
{
"name": "BaseFeePerGas",
"dataType": "STRING"
},
{
"name": "Type",
"dataType": "INT"
}
],
"dateTimeFieldSpecs": [
{
"name": "Timestamp",
"dataType": "TIMESTAMP",
"format": "1:MILLISECONDS:EPOCH",
"granularity": "1:DAYS"
}
]
}
Wojciech Wasik
09/30/2022, 12:38 PMCaused by: java.lang.NumberFormatException: For input string: "2022-09-08T22:22:09Z"
Wojciech Wasik
09/30/2022, 12:39 PM"2022-09-08T22:22:09Z"
Wojciech Wasik
09/30/2022, 12:39 PMNeha Pawar
Wojciech Wasik
09/30/2022, 6:20 PMjava.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152)
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121)
at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130)
at org.apache.pinot.tools.Command.call(Command.java:33)
at org.apache.pinot.tools.Command.call(Command.java:29)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:167)
at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:198)
Caused by: java.lang.RuntimeException: Failed to generate Pinot segment for file - <s3://0x-wojciech/transactions-small.csv>
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.lambda$submitSegmentGenTask$1(SegmentGenerationJobRunner.java:286)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.RuntimeException: Caught exception while transforming data type for column: Timestamp
at org.apache.pinot.segment.local.recordtransformer.DataTypeTransformer.transform(DataTypeTransformer.java:146)
at org.apache.pinot.segment.local.recordtransformer.CompositeTransformer.transform(CompositeTransformer.java:83)
at org.apache.pinot.segment.local.segment.creator.TransformPipeline.processPlainRow(TransformPipeline.java:97)
at org.apache.pinot.segment.local.segment.creator.TransformPipeline.processRow(TransformPipeline.java:92)
at org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreationDataSource.gatherStats(RecordReaderSegmentCreationDataSource.java:67)
at org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreationDataSource.gatherStats(RecordReaderSegmentCreationDataSource.java:37)
at org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl.init(SegmentIndexCreationDriverImpl.java:181)
at org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl.init(SegmentIndexCreationDriverImpl.java:153)
at org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl.init(SegmentIndexCreationDriverImpl.java:102)
at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:118)
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.lambda$submitSegmentGenTask$1(SegmentGenerationJobRunner.java:264)
... 5 more
Caused by: java.lang.NumberFormatException: For input string: "2022-09-08T22:22:09Z"
at java.base/java.lang.NumberFormatException.forInputString(NumberFormatException.java:65)
at java.base/java.lang.Long.parseLong(Long.java:692)
at java.base/java.lang.Long.parseLong(Long.java:817)
at org.apache.pinot.spi.data.DateTimeFormatSpec.fromFormatToMillis(DateTimeFormatSpec.java:300)
at org.apache.pinot.segment.local.recordtransformer.DataTypeTransformer.transform(DataTypeTransformer.java:94)
... 15 more
Wojciech Wasik
09/30/2022, 6:21 PMjava.lang.RuntimeException: Caught exception during running - org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.kickoffIngestionJob(IngestionJobLauncher.java:152)
at org.apache.pinot.spi.ingestion.batch.IngestionJobLauncher.runIngestionJob(IngestionJobLauncher.java:121)
at org.apache.pinot.tools.admin.command.LaunchDataIngestionJobCommand.execute(LaunchDataIngestionJobCommand.java:130)
at org.apache.pinot.tools.Command.call(Command.java:33)
at org.apache.pinot.tools.Command.call(Command.java:29)
at picocli.CommandLine.executeUserObject(CommandLine.java:1953)
at picocli.CommandLine.access$1300(CommandLine.java:145)
at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2346)
at picocli.CommandLine$RunLast.handle(CommandLine.java:2311)
at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179)
at picocli.CommandLine.execute(CommandLine.java:2078)
at org.apache.pinot.tools.admin.PinotAdministrator.execute(PinotAdministrator.java:167)
at org.apache.pinot.tools.admin.PinotAdministrator.main(PinotAdministrator.java:198)
Caused by: java.lang.RuntimeException: Failed to generate Pinot segment for file - <s3://0x-wojciech/transactions-small.csv>
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.lambda$submitSegmentGenTask$1(SegmentGenerationJobRunner.java:286)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: java.lang.IllegalArgumentException: Invalid partial or full segment name: 2022-08-20T00:00:07Z
at org.apache.pinot.segment.spi.creator.name.SegmentNameUtils.validatePartialOrFullSegmentName(SegmentNameUtils.java:40)
at org.apache.pinot.segment.spi.creator.name.SimpleSegmentNameGenerator.generateSegmentName(SimpleSegmentNameGenerator.java:63)
at org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl.handlePostCreation(SegmentIndexCreationDriverImpl.java:279)
at org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl.build(SegmentIndexCreationDriverImpl.java:269)
at org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner.run(SegmentGenerationTaskRunner.java:119)
at org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner.lambda$submitSegmentGenTask$1(SegmentGenerationJobRunner.java:264)
... 5 more
Wojciech Wasik
09/30/2022, 6:25 PMNeha Pawar
"transformConfigs": [
{
"columnName": "TimestampMillis",
"transformFunction": "fromDateTime(\"Timestamp\", 'yyyy-MM-dd''T''HH:mm:ss''Z')"
}
also had to change Timestamp to TimestampMillis in schema and tableConfig:timeColumnName.
For the function, the input args should be in the record, and then you collect it into another fieldNeha Pawar
Neha Pawar
Tim Santos
09/30/2022, 9:12 PM"segmentNameGenerator.type" : "normalizedDate"
Tim Santos
09/30/2022, 9:13 PMNeha Pawar
Wojciech Wasik
10/03/2022, 8:04 AMWojciech Wasik
10/03/2022, 8:05 AMSid
04/03/2023, 12:28 PMKartik Khare
04/03/2023, 12:35 PMSid
04/03/2023, 12:50 PM