somanath joglekar
08/20/2024, 8:09 PMKen Krugler
08/20/2024, 11:14 PMm5d.2xlarge
instance, which has an SSD.Xiang Fu
Xiang Fu
//Get staging directory for temporary output pinot segments
String stagingDir = _spec.getExecutionFrameworkSpec().getExtraConfigs().get(STAGING_DIR);
URI stagingDirURI = null;
if (stagingDir != null) {
stagingDirURI = URI.create(stagingDir);
if (stagingDirURI.getScheme() == null) {
stagingDirURI = new File(stagingDir).toURI();
}
if (!outputDirURI.getScheme().equals(stagingDirURI.getScheme())) {
throw new RuntimeException(String
.format("The scheme of staging directory URI [%s] and output directory URI [%s] has to be same.",
stagingDirURI, outputDirURI));
}
outputDirFS.mkdir(stagingDirURI);
}
Xiang Fu
# executionFrameworkSpec: Defines ingestion jobs to be running.
executionFrameworkSpec:
# name: execution framework name
name: 'spark'
# Class to use for segment generation and different push types.
segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.spark.SparkSegmentGenerationJobRunner'
segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.spark.SparkSegmentTarPushJobRunner'
segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.spark.SparkSegmentUriPushJobRunner'
segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.spark.SparkSegmentMetadataPushJobRunner'
# extraConfigs: extra configs for execution framework.
extraConfigs:
# stagingDir is used in distributed filesystem to host all the segments then move this directory entirely to output directory.
stagingDir: examples/batch/airlineStats/staging
Ken Krugler
08/21/2024, 7:06 PMXiang Fu