George Leonard
07/20/2024, 7:29 PMFROM arm64v8/flink:1.18.1-scala_2.12-java11
SHELL ["/bin/bash", "-c"]
# Install some useful tools
RUN apt-get update && \
apt-get install -y neovim tree lnav unzip
RUN echo "Purge apt artifacts" && \
apt-get purge -y --auto-remove $build_deps && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN wget <https://github.com/duckdb/duckdb/releases/download/v1.0.0/duckdb_cli-linux-aarch64.zip> \
&& unzip duckdb_cli-linux-aarch64.zip -d /usr/local/bin \
&& rm duckdb_cli-linux-aarch64.zip
USER flink
WORKDIR /opt/flink
# Set up Hive config
COPY flink/conf/hive-site.xml ./conf/hive-site.xml
# Enable SQL Client to find the job manager when running it from this image
RUN sed -i "s/jobmanager.rpc.address: localhost/jobmanager.rpc.address: flink-jobmanager/g" ./conf/flink-conf.yaml
# Install JARs
# See <https://repo.maven.apache.org/maven2/org/apache/flink/>
RUN echo "-> Install JARs: Flink's connector" && \
mkdir -p ./lib && pushd $_ && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-avro-confluent-registry/1.18.1/flink-sql-avro-confluent-registry-1.18.1.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-avro/1.18.1/flink-sql-avro-1.18.1.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-kafka/3.2.0-1.18/flink-sql-connector-kafka-3.2.0-1.18.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-jdbc/3.2.0-1.18/flink-connector-jdbc-3.2.0-1.18.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-kafka/3.2.0-1.18/flink-connector-kafka-3.2.0-1.18.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-json/1.18.1/flink-json-1.18.1.jar> -O && \
curl <https://github.com/knaufk/flink-faker/releases/download/v0.5.3/flink-faker-0.5.3.jar> -O && \
curl <https://jdbc.postgresql.org/download/postgresql-42.5.4.jar> -O && \
popd
RUN echo "-> Install JARs: DB connectors" && \
mkdir -p ./lib && pushd $_ && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7/3.0.1-1.17/flink-sql-connector-elasticsearch7-3.0.1-1.17.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-mongodb/1.2.0-1.18/flink-connector-mongodb-1.2.0-1.18.jar> -O && \
curl <https://repo.maven.apache.org/maven2/org/apache/flink/flink-connector-redis_2.11/1.1.5/flink-connector-redis_2.11-1.1.5.jar> -O && \
curl <https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.30/mysql-connector-java-8.0.30.jar> -O && \
curl <https://repo1.maven.org/maven2/org/postgresql/postgresql/42.5.1/postgresql-42.5.1.jar> -O && \
popd
RUN echo "-> Install JARs: Flink's Hive connector" && \
mkdir -p ./lib/hive && pushd $_ && \
curl <https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-hive-3.1.3_2.12/1.18.1/flink-sql-connector-hive-3.1.3_2.12-1.18.1.jar> -O && \
popd
RUN echo "-> Install JARs: Dependencies for Iceberg" && \
mkdir -p ./lib/iceberg && pushd $_ && \
curl <https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-1.18/1.5.0/iceberg-flink-runtime-1.18-1.5.0.jar> -O && \
popd
RUN echo "-> Install JARs: AWS / Hadoop S3" && \
mkdir -p ./lib/aws && pushd $_ && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/3.3.4/hadoop-aws-3.3.4.jar> -O && \
curl <https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/1.12.648/aws-java-sdk-bundle-1.12.648.jar> -O && \
popd
RUN echo "-> Install JARs: Hadoop" && \
mkdir -p ./lib/hadoop && pushd $_ && \
curl <https://repo1.maven.org/maven2/org/apache/commons/commons-configuration2/2.1.1/commons-configuration2-2.1.1.jar> -O && \
curl <https://repo1.maven.org/maven2/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar> -O && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-auth/3.3.4/hadoop-auth-3.3.4.jar> -O && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/3.3.4/hadoop-common-3.3.4.jar> -O && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/thirdparty/hadoop-shaded-guava/1.1.1/hadoop-shaded-guava-1.1.1.jar> -O && \
curl <https://repo1.maven.org/maven2/org/codehaus/woodstox/stax2-api/4.2.1/stax2-api-4.2.1.jar> -O && \
curl <https://repo1.maven.org/maven2/com/fasterxml/woodstox/woodstox-core/5.3.0/woodstox-core-5.3.0.jar> -O && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-hdfs-client/3.3.4/hadoop-hdfs-client-3.3.4.jar> -O && \
curl <https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-mapreduce-client-core/3.3.4/hadoop-mapreduce-client-core-3.3.4.jar> -O && \
popd
# Set the launch command
CMD ./bin/start-cluster.sh && sleep infinity