Slackbot
06/06/2023, 7:35 AMJohn Kowtko
06/06/2023, 10:36 AMBasayya Swami
06/06/2023, 10:40 AM"timestampSpec": {
"column": "cal_wk_start_dt",
"format": "iso"
}
so cal_wk_start_dt is replacing with __time but values in __time is different than my source data coming for cal_wk_start_dtBasayya Swami
06/06/2023, 10:41 AMJohn Kowtko
06/06/2023, 10:43 AMBasayya Swami
06/06/2023, 10:45 AMJohn Kowtko
06/06/2023, 10:48 AMBasayya Swami
06/06/2023, 11:12 AM{
"type": "index_hadoop",
"spec": {
"dataSchema": {
"dataSource": "datasource_test",
"timestampSpec": null,
"dimensionsSpec": null,
"metricsSpec": [],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "WEEK",
"queryGranularity": "WEEK",
"rollup": true,
"intervals": [
"#dataInterval#"
]
},
"transformSpec": {
"filter": null,
"transforms": []
},
"parser": {
"type": "parquet",
"parseSpec": {
"format": "parquet",
"columns": [
"start_dt",
"col1",
"col12",
"col3",
"col4",
"col5"
],
"timestampSpec": {
"column": "start_dt",
"format": "iso"
},
"dimensionsSpec": {
"dimensions": [
{
"type": "long",
"name": "start_dt"
},
{
"type": "string",
"name": "col1"
},
{
"type": "string",
"name": "col2"
},
{
"type": "string",
"name": "col3"
},
{
"type": "string",
"name": "col4"
},
{
"type": "string",
"name": "col5"
}
],
"dimensionExclusions": []
}
}
}
},
"ioConfig": {
"type": "hadoop",
"inputSpec": {
"type": "granularity",
"dataGranularity": "week",
"filePattern": ".*",
"inputFormat": "org.apache.druid.data.input.parquet.DruidParquetInputFormat",
"pathFormat": "'wk_nbr='yyyyww/",
"inputPath": "<gs://gcslocation/gcstable/>"
},
"metadataUpdateSpec": null,
"segmentOutputPath": null
},
"tuningConfig": {
"type": "hadoop",
"workingPath": null,
"partitionsSpec": {
"type": "hashed",
"numShards": 5,
"partitionDimensions": [],
"partitionFunction": "murmur3_32_abs",
"maxRowsPerSegment": null
},
"shardSpecs": {},
"indexSpec": {
"bitmap": {
"type": "concise"
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs",
"segmentLoader": null
},
"indexSpecForIntermediatePersists": {
"bitmap": {
"type": "concise"
},
"dimensionCompression": "lz4",
"metricCompression": "lz4",
"longEncoding": "longs",
"segmentLoader": null
},
"appendableIndexSpec": {
"type": "onheap"
},
"maxRowsInMemory": 1000000,
"maxBytesInMemory": 0,
"leaveIntermediate": false,
"cleanupOnFailure": true,
"overwriteFiles": false,
"ignoreInvalidRows": false,
"jobProperties": {
"mapreduce.job.classloader": "true",
"mapreduce.job.user.classpath.first": "true",
"mapreduce.input.fileinputformat.list-status.num-threads": "8",
"mapreduce.map.memory.mb": "5461",
"mapreduce.reduce.memory.mb": "5461",
"mapreduce.map.output.compress": "true",
"mapreduce.map.java.opts": "-Xmx4096m",
"mapreduce.reduce.java.opts": "-Xmx4096m",
"mapreduce.job.split.metainfo.maxsize": "-1",
"mapreduce.task.io.sort.mb": "2047",
"mapred.job.reuse.jvm.num.tasks": "20",
"io.seqfile.sorter.recordlimit": "10000000",
"mapred.output.compress": "true",
"mapreduce.job.reduce.slowstart.completedmaps": "0.5",
"mapreduce.reduce.shuffle.merge.percent": "0.8"
},
"combineText": false,
"useCombiner": false,
"buildV9Directly": true,
"numBackgroundPersistThreads": 0,
"forceExtendableShardSpecs": false,
"useExplicitVersion": false,
"allowedHadoopPrefix": [],
"logParseExceptions": false,
"maxParseExceptions": 0,
"useYarnRMJobStatusFallback": true
}
},
"hadoopDependencyCoordinates": null,
"classpathPrefix": null,
"context": {
"forceTimeChunkLock": true,
"useLineageBasedSegmentAllocation": true
}
}
John Kowtko
06/06/2023, 11:30 AMJohn Kowtko
06/06/2023, 11:32 AMBasayya Swami
06/06/2023, 1:31 PMBasayya Swami
06/06/2023, 1:33 PMJohn Kowtko
06/06/2023, 2:08 PMselect TIMESTAMPADD(DAY, -extract(dow from CURRENT_TIMESTAMP), CURRENT_TIMESTAMP)
For native ingestion the functions are timestamp_shift() and timestamp_extract() ... I can't get the ingestion expression to take in my demo dataset ... maybe you can get it working.Abhishek Balaji Radhakrishnan
06/06/2023, 2:54 PMWEEK
granularity can be tricky since it doesn’t always align well with months or years. Consider using DAY
or MONTH
instead - see a recent change that advises against it: https://github.com/apache/druid/pull/14341/files?short_path=2b1d633#diff-2b1d6334204fbf5b1a3bbafb48a34b341caf65e368934447c516f15173226569Abhishek Balaji Radhakrishnan
06/06/2023, 2:56 PMWEEK
granularity, I think you could also do something like TIME_FLOOR(__time, 'P1W')
, similar to John’s suggestion above.Basayya Swami
06/06/2023, 2:58 PMJohn Kowtko
06/06/2023, 3:42 PMBasayya Swami
06/06/2023, 4:00 PM