Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions tez-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,38 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>docker</id>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>build-docker-image</id>
<phase>package</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>/bin/bash</executable>
<arguments>
<argument>${project.basedir}/src/docker/build-docker.sh</argument>
<argument>-hadoop</argument>
<argument>${hadoop.version}</argument>
<argument>-tez</argument>
<argument>${project.version}</argument>
<argument>-repo</argument>
<argument>apache</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

<build>
Expand Down
85 changes: 85 additions & 0 deletions tez-dist/src/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

ARG BUILD_ENV=unarchive

FROM ubuntu AS unarchive
ONBUILD COPY hadoop-*.tar.gz /opt
# UPDATED: Matches "tez-1.0.0-SNAPSHOT.tar.gz" pattern
ONBUILD COPY tez-*.tar.gz /opt

FROM ${BUILD_ENV} AS env
ARG HADOOP_VERSION
ARG TEZ_VERSION

RUN mkdir -p /opt/hadoop \
&& tar -xzv \
--exclude="hadoop-$HADOOP_VERSION/share/doc" \
--exclude="*/jdiff" \
--exclude="*/sources" \
--exclude="*tests.jar" \
--exclude="*/webapps" \
-f /opt/hadoop-$HADOOP_VERSION.tar.gz \
-C /opt/hadoop --strip-components 1 \
&& mkdir -p /opt/tez \
&& tar -xzv \
-f /opt/tez-$TEZ_VERSION.tar.gz \
-C /opt/tez \
&& rm -rf /opt/hadoop-$HADOOP_VERSION.tar.gz /opt/tez-$TEZ_VERSION.tar.gz

FROM eclipse-temurin:21.0.3_9-jre-ubi9-minimal AS run

ARG UID=1000
ARG HADOOP_VERSION
ARG TEZ_VERSION

# Install dependencies
RUN set -ex; \
microdnf update -y; \
microdnf -y install procps gettext findutils; \
microdnf clean all; \
useradd --no-create-home -s /sbin/nologin -c "" --uid $UID tez

# Set necessary environment variables
ENV HADOOP_HOME=/opt/hadoop \
TEZ_HOME=/opt/tez \
TEZ_CONF_DIR=/opt/tez/conf \
HADOOP_CONF_DIR=/opt/tez/conf

ENV PATH=$TEZ_HOME/bin:$HADOOP_HOME/bin:$PATH

COPY --from=env --chown=tez /opt/hadoop $HADOOP_HOME
# UPDATED: Copy from the normalized directory name created in 'env' stage
COPY --from=env --chown=tez /opt/tez $TEZ_HOME

RUN mkdir -p $TEZ_CONF_DIR && chown tez:tez $TEZ_CONF_DIR

COPY --chown=tez entrypoint.sh /
COPY --chown=tez conf $TEZ_CONF_DIR

# Create Extension Point Directory
RUN mkdir -p /opt/tez/plugins && chown tez:tez /opt/tez/plugins && chmod 755 /opt/tez/plugins

RUN chmod +x /entrypoint.sh

USER tez
WORKDIR $TEZ_HOME

# Expose AM RPC Ports
EXPOSE 10001 10002 10003 8042 2181

ENTRYPOINT ["/entrypoint.sh"]
128 changes: 128 additions & 0 deletions tez-dist/src/docker/build-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -xeou pipefail

HADOOP_VERSION=
TEZ_VERSION=
REPO=

usage() {
cat <<EOF 1>&2
Usage: $0 [-h] [-hadoop <Hadoop version>] [-tez <Tez version>] [-repo <Docker repo>]
Build the Apache Tez AM Docker image
-help Display help
-hadoop Build image with the specified Hadoop version
-tez Build image with the specified Tez version
-repo Docker repository
EOF
}

while [ $# -gt 0 ]; do
case "$1" in
-h)
usage
exit 0
;;
-hadoop)
shift
HADOOP_VERSION=$1
shift
;;
-tez)
shift
TEZ_VERSION=$1
shift
;;
-repo)
shift
REPO=$1
shift
;;
*)
shift
;;
esac
done

SCRIPT_DIR=$(
cd "$(dirname "$0")"
pwd
)

DIST_DIR=${DIST_DIR:-"$SCRIPT_DIR/../.."}
PROJECT_ROOT=${PROJECT_ROOT:-"$SCRIPT_DIR/../../.."}

repo=${REPO:-apache}
WORK_DIR="$(mktemp -d)"
CACHE_DIR="$SCRIPT_DIR/cache"
mkdir -p "$CACHE_DIR"

# Defaults Hadoop and Tez versions from pom.xml if not provided
HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)}
TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$PROJECT_ROOT/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout)}

######################
# HADOOP FETCH LOGIC #
######################
HADOOP_FILE_NAME="hadoop-$HADOOP_VERSION.tar.gz"
HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/$HADOOP_FILE_NAME"}
if [ ! -f "$CACHE_DIR/$HADOOP_FILE_NAME" ]; then
echo "Downloading Hadoop from $HADOOP_URL..."
if ! curl --fail -L "$HADOOP_URL" -o "$CACHE_DIR/$HADOOP_FILE_NAME.tmp"; then
echo "Fail to download Hadoop, exiting...."
exit 1
fi
mv "$CACHE_DIR/$HADOOP_FILE_NAME.tmp" "$CACHE_DIR/$HADOOP_FILE_NAME"
fi

#####################################
# Pick tez tarball from local build #
#####################################
TEZ_FILE_NAME="tez-$TEZ_VERSION.tar.gz"
LOCAL_DIST_PATH="$DIST_DIR/target/$TEZ_FILE_NAME"

if [ -f "$LOCAL_DIST_PATH" ]; then
echo "--> Found local Tez build artifact at: $LOCAL_DIST_PATH"
cp "$LOCAL_DIST_PATH" "$WORK_DIR/"
else
echo "--> Error: Local Tez artifact not found at $LOCAL_DIST_PATH"
echo "--> Please build the project first (e.g., mvn clean install -DskipTests)."
exit 1
fi

# -------------------------------------------------------------------------
# BUILD CONTEXT PREPARATION
# -------------------------------------------------------------------------
cp "$CACHE_DIR/$HADOOP_FILE_NAME" "$WORK_DIR/"
cp -R "$SCRIPT_DIR/conf" "$WORK_DIR/" 2>/dev/null || mkdir -p "$WORK_DIR/conf"
cp "$SCRIPT_DIR/entrypoint.sh" "$WORK_DIR/"
cp "$SCRIPT_DIR/Dockerfile" "$WORK_DIR/"

echo "Building Docker image..."
docker build \
"$WORK_DIR" \
-f "$WORK_DIR/Dockerfile" \
-t "$repo/tez-am:$TEZ_VERSION" \
--build-arg "BUILD_ENV=unarchive" \
--build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
--build-arg "TEZ_VERSION=$TEZ_VERSION"

rm -r "${WORK_DIR}"
echo "Docker image $repo/tez-am:$TEZ_VERSION built successfully."
52 changes: 52 additions & 0 deletions tez-dist/src/docker/conf/tez-site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<configuration>
<property>
<name>tez.am.client.am.port-range</name>
<value>10001-10003</value>
</property>

<property>
<name>tez.am.resource.memory.mb</name>
<value>1024</value>
</property>

<property>
<name>tez.framework.mode</name>
<value>STANDALONE_ZOOKEEPER</value>
</property>

<property>
<name>tez.am.zookeeper.quorum</name>
<value>host.docker.internal:2181</value>
</property>

<property>
<name>tez.am.log.level</name>
<value>DEBUG</value>
</property>

<property>
<name>tez.am.mode.session</name>
<value>true</value>
</property>


</configuration>
Loading