Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .zuul.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
- project:
name: theopenlab/spark
check:
jobs:
- spark-k8s-test

- job:
name: spark-k8s-test
parent: init-test
description: |
Run integration tests of spark of master against v1.13.3 k8s cluster deployed by v0.34.1 minikube
run: .zuul/playbooks/spark-k8s-test/run.yaml
nodeset: ubuntu-xenial-arm64
timeout: 86400
vars:
k8s_log_dir: '{{ ansible_user_dir }}/workspace/logs/kubernetes'
kubernetes_version: 1.15.3
108 changes: 108 additions & 0 deletions .zuul/playbooks/spark-k8s-test/run.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
- hosts: all
become: yes
roles:
- install-openjdk
- create-single-k8s-cluster-with-kubeadm
tasks:
- name: Preparing R env
shell:
cmd: |
set -ex
apt-get update

# compile R 3.6.1
mkdir /opt/installR
cd /opt/installR
wget https://cran.r-project.org/src/base/R-3/R-3.6.1.tar.gz
tar -zxvf R-3.6.1.tar.gz

# compile zlib for R
apt-get install gfortran -y
apt-get install build-essential -y
apt-get install libreadline-dev -y
apt-get install libxt-dev -y
wget http://zlib.net/zlib-1.2.11.tar.gz
tar xzvf zlib-1.2.11.tar.gz
cd zlib-1.2.11
./configure
make && make install
cd ..

# compile xz for R
apt install libbz2-dev -y
wget http://tukaani.org/xz/xz-5.2.2.tar.gz
tar xzvf xz-5.2.2.tar.gz
cd xz-5.2.2
./configure
make && make install
cd ..

# compile pcre for R
wget https://ftp.pcre.org/pub/pcre/pcre-8.41.tar.gz
tar -zxvf pcre-8.41.tar.gz
cd pcre-8.41
./configure --prefix=/opt/pcre-8.41 --enable-utf8
make -j3 && make install
cd ..

# compile libcurl for R
wget https://curl.haxx.se/download/curl-7.50.1.tar.gz
tar zxvf curl-7.50.1.tar.gz
cd curl-7.50.1/
apt-get install openssl* -y
apt-get install libcurl4-gnutls-dev -y
apt-get install libssl-dev -y
./configure --prefix=/opt/curl-7.50.1
make && make install
cd ..

# slip out the default curl which didn't support https in the VM image
export PATH=$PATH:/opt/curl-7.50.1/bin
mv /usr/bin/curl /usr/bin/curl_bk

# Starting R complie
cd R-3.6.1
apt-get install dbus libpng-dev libjpeg-dev libcairo-dev qpdf -y
./configure --with-cairo --with-x --enable-R-shlib LDFLAGS="-L/opt/pcre-8.41/lib -L/opt/curl-7.50.1/lib" CPPFLAGS="-I/opt/pcre-8.41/include -I/opt/curl-7.50.1/include"
echo "/opt/pcre-8.41/lib" >> /etc/ld.so.conf
echo "/opt/curl-7.50.1/lib" >> /etc/ld.so.conf
ldconfig
make && make install

# Prepare the SparkR dependencies
R -e "install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
apt install libxml2-dev -y
R -e "install.packages(c('xml2', 'rversions', 'roxygen2'), repos='https://cloud.r-project.org/')"
R -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
R -e "install.packages(c('crayon', 'praise', 'R6'), repos='https://cloud.r-project.org/')"
R -e "install.packages('https://cloud.r-project.org/src/contrib/Archive/testthat/testthat_1.0.2.tar.gz', repos=NULL, type='source')"
R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival'); packageVersion('xml2'); packageVersion('rversions'); packageVersion('roxygen2'); packageVersion('devtools');"

cd {{ ansible_user_dir }}/{{ zuul.project.src_dir }}
./R/install-dev.sh
chdir: '{{ zuul.project.src_dir }}'
executable: /bin/bash
environment: '{{ global_env }}'
- name: Run integration tests of Spark with k8s cluster manager
shell: |
set -ex

sed -i -e '/127.0.0.1/ s/\(localhost\)/'$(hostname)' \1/' /etc/hosts

# Create required account in k8s
export KUBECONFIG=/etc/kubernetes/admin.conf
kubectl create serviceaccount spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark --namespace=default
# NOTE: the distribution step may fail due to dependencies downloading failure, so we support retry
sleep 86400
for i in $(seq 1 3); do ./dev/make-distribution.sh --tgz --r -Pkubernetes && s=0 && break || s=$? && sleep 5; done; (exit $s)
pushd resource-managers/kubernetes/integration-tests
dev/dev-run-integration-tests.sh --deploy-mode cloud \
--spark-master k8s://$(kubectl config view -o jsonpath='{.clusters[0].cluster.server}') \
--spark-tgz $(realpath ../../../spark-*.tgz) --namespace default --service-account spark \
--exclude-tags minikube
popd
args:
executable: /bin/bash
chdir: '{{ zuul.project.src_dir }}'
environment: '{{ global_env }}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

FROM arm64v8/openjdk:8-alpine

ARG spark_uid=185

# Before building the docker image, first build and make a Spark distribution following
# the instructions in http://spark.apache.org/docs/latest/building-spark.html.
# If this docker file is being used in the context of building your images from a Spark
# distribution, the docker build command should be invoked from the top level directory
# of the Spark distribution. E.g.:
# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .

RUN set -ex && \
apk upgrade --no-cache && \
ln -s /lib /lib64 && \
apk add --no-cache bash tini libc6-compat linux-pam krb5 krb5-libs nss && \
mkdir -p /opt/spark && \
mkdir -p /opt/spark/examples && \
mkdir -p /opt/spark/work-dir && \
touch /opt/spark/RELEASE && \
rm /bin/sh && \
ln -sv /bin/bash /bin/sh && \
echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
chgrp root /etc/passwd && chmod ug+rw /etc/passwd

#ENV TINI_VERSION v0.18.0
#ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-arm64 /usr/bin/tini
#RUN chmod +x /usr/bin/tini

COPY jars /opt/spark/jars
COPY bin /opt/spark/bin
COPY sbin /opt/spark/sbin
COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/
COPY examples /opt/spark/examples
COPY kubernetes/tests /opt/spark/tests
COPY data /opt/spark/data

ENV SPARK_HOME /opt/spark

WORKDIR /opt/spark/work-dir
RUN chmod g+w /opt/spark/work-dir

ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
USER ${spark_uid}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

ARG base_img

FROM $base_img
WORKDIR /

# Reset to root to run installation tasks
USER 0

RUN mkdir ${SPARK_HOME}/R

RUN apk add --no-cache R R-dev

COPY R ${SPARK_HOME}/R
ENV R_HOME /usr/lib/R

WORKDIR /opt/spark/work-dir
ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
ARG spark_uid=185
USER ${spark_uid}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

ARG base_img

FROM $base_img
WORKDIR /

# Reset to root to run installation tasks
USER 0

RUN mkdir ${SPARK_HOME}/python
# TODO: Investigate running both pip and pip3 via virtualenvs
RUN apk add --no-cache python && \
apk add --no-cache python3 && \
python -m ensurepip && \
python3 -m ensurepip && \
# We remove ensurepip since it adds no functionality since pip is
# installed on the image and it just takes up 1.6MB on the image
rm -r /usr/lib/python*/ensurepip && \
pip install --upgrade pip setuptools && \
# You may install with python3 packages by using pip3.6
# Removed the .cache to save space
rm -rf /root/.cache

COPY python/pyspark ${SPARK_HOME}/python/pyspark
COPY python/lib ${SPARK_HOME}/python/lib

WORKDIR /opt/spark/work-dir
ENTRYPOINT [ "/opt/entrypoint.sh" ]

# Specify the User that the actual main process will run as
ARG spark_uid=185
USER ${spark_uid}
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,8 @@ case "$1" in
esac

# Execute the container CMD under tini for better hygiene
exec /usr/bin/tini -s -- "${CMD[@]}"
if [[ $(uname -i) == "aarch64" ]]; then
exec /sbin/tini -s -- "${CMD[@]}"
else
exec /usr/bin/tini -s -- "${CMD[@]}"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,22 @@ then
# If there is no spark image tag to test with and no src dir, build from current
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
SPARK_INPUT_DIR="$(cd "$SCRIPT_DIR/"../../../../ >/dev/null 2>&1 && pwd )"
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/resource-managers/kubernetes/docker/src/main/dockerfiles/spark"
if [[ $(uname -i) == "aarch64" ]]; then
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/aarch64"
else
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/resource-managers/kubernetes/docker/src/main/dockerfiles/spark"
fi
elif [[ $IMAGE_TAG == "N/A" ]];
then
# If there is a test src tarball and no image tag we will want to build from that
mkdir -p $UNPACKED_SPARK_TGZ
tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ;
SPARK_INPUT_DIR="$UNPACKED_SPARK_TGZ"
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/kubernetes/dockerfiles/spark"
if [[ $(uname -i) == "aarch64" ]]; then
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/kubernetes/dockerfiles/spark/aarch64"
else
DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/kubernetes/dockerfiles/spark"
fi
fi


Expand All @@ -82,6 +90,8 @@ then
IMAGE_TAG=$(uuidgen);
cd $SPARK_INPUT_DIR

BASE_IMAGE_BUILD_ARGS="-f $DOCKER_FILE_BASE_PATH/Dockerfile"

# Build PySpark image
LANGUAGE_BINDING_BUILD_ARGS="-p $DOCKER_FILE_BASE_PATH/bindings/python/Dockerfile"

Expand All @@ -95,7 +105,7 @@ then
case $DEPLOY_MODE in
cloud)
# Build images
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS $BASE_IMAGE_BUILD_ARGS build

# Push images appropriately
if [[ $IMAGE_REPO == gcr.io* ]] ;
Expand All @@ -109,13 +119,13 @@ then
docker-for-desktop)
# Only need to build as this will place it in our local Docker repo which is all
# we need for Docker for Desktop to work so no need to also push
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS $BASE_IMAGE_BUILD_ARGS build
;;

minikube)
# Only need to build and if we do this with the -m option for minikube we will
# build the images directly using the minikube Docker daemon so no need to push
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build
$SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS $BASE_IMAGE_BUILD_ARGS build
;;
*)
echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1
Expand Down