Skip to content

Commit b61411d

Browse files
committed
[SPARK-50328][INFRA] Add a separate docker file for SparkR
### What changes were proposed in this pull request? Add a separate docker file for SparkR ### Why are the changes needed? For env isolation ### Does this PR introduce _any_ user-facing change? No, infra-only ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? No Closes #48859 from zhengruifeng/infra_image_r. Authored-by: Ruifeng Zheng <[email protected]> Signed-off-by: Ruifeng Zheng <[email protected]>
1 parent fa36e8b commit b61411d

3 files changed

Lines changed: 115 additions & 1 deletion

File tree

.github/workflows/build_and_test.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ jobs:
6262
image_docs_url_link: ${{ steps.infra-image-link.outputs.image_docs_url_link }}
6363
image_lint_url: ${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}
6464
image_lint_url_link: ${{ steps.infra-image-link.outputs.image_lint_url_link }}
65+
image_sparkr_url: ${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}
66+
image_sparkr_url_link: ${{ steps.infra-image-link.outputs.image_sparkr_url_link }}
6567
steps:
6668
- name: Checkout Spark repository
6769
uses: actions/checkout@v4
@@ -154,6 +156,14 @@ jobs:
154156
IMG_NAME="apache-spark-ci-image-lint:${{ inputs.branch }}-${{ github.run_id }}"
155157
IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
156158
echo "image_lint_url=$IMG_URL" >> $GITHUB_OUTPUT
159+
- name: Generate infra image URL (SparkR)
160+
id: infra-image-sparkr-outputs
161+
run: |
162+
# Convert to lowercase to meet Docker repo name requirement
163+
REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
164+
IMG_NAME="apache-spark-ci-image-sparkr:${{ inputs.branch }}-${{ github.run_id }}"
165+
IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
166+
echo "image_sparkr_url=$IMG_URL" >> $GITHUB_OUTPUT
157167
- name: Link the docker images
158168
id: infra-image-link
159169
run: |
@@ -162,9 +172,11 @@ jobs:
162172
if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
163173
echo "image_docs_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
164174
echo "image_lint_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
175+
echo "image_sparkr_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
165176
else
166177
echo "image_docs_url_link=${{ steps.infra-image-docs-outputs.outputs.image_docs_url }}" >> $GITHUB_OUTPUT
167178
echo "image_lint_url_link=${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}" >> $GITHUB_OUTPUT
179+
echo "image_sparkr_url_link=${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}" >> $GITHUB_OUTPUT
168180
fi
169181
170182
# Build: build Spark and run the tests for specified modules.
@@ -405,6 +417,17 @@ jobs:
405417
${{ needs.precondition.outputs.image_lint_url }}
406418
# Use the infra image cache to speed up
407419
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ inputs.branch }}
420+
- name: Build and push (SparkR)
421+
if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
422+
id: docker_build_sparkr
423+
uses: docker/build-push-action@v6
424+
with:
425+
context: ./dev/spark-test-image/sparkr/
426+
push: true
427+
tags: |
428+
${{ needs.precondition.outputs.image_sparkr_url }}
429+
# Use the infra image cache to speed up
430+
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ inputs.branch }}
408431

409432

410433
pyspark:
@@ -564,7 +587,7 @@ jobs:
564587
runs-on: ubuntu-latest
565588
timeout-minutes: 180
566589
container:
567-
image: ${{ needs.precondition.outputs.image_url }}
590+
image: ${{ needs.precondition.outputs.image_sparkr_url_link }}
568591
env:
569592
HADOOP_PROFILE: ${{ inputs.hadoop }}
570593
HIVE_PROFILE: hive2.3

.github/workflows/build_infra_images_cache.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ on:
2929
- 'dev/infra/Dockerfile'
3030
- 'dev/spark-test-image/docs/Dockerfile'
3131
- 'dev/spark-test-image/lint/Dockerfile'
32+
- 'dev/spark-test-image/sparkr/Dockerfile'
3233
- '.github/workflows/build_infra_images_cache.yml'
3334
# Create infra image when cutting down branches/tags
3435
create:
@@ -88,3 +89,16 @@ jobs:
8889
- name: Image digest (Linter)
8990
if: hashFiles('dev/spark-test-image/lint/Dockerfile') != ''
9091
run: echo ${{ steps.docker_build_lint.outputs.digest }}
92+
- name: Build and push (SparkR)
93+
if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
94+
id: docker_build_sparkr
95+
uses: docker/build-push-action@v6
96+
with:
97+
context: ./dev/spark-test-image/sparkr/
98+
push: true
99+
tags: ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }}-static
100+
cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }}
101+
cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }},mode=max
102+
- name: Image digest (SparkR)
103+
if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
104+
run: echo ${{ steps.docker_build_sparkr.outputs.digest }}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
# Image for building and testing Spark branches. Based on Ubuntu 22.04.
19+
# See also in https://hub.docker.com/_/ubuntu
20+
FROM ubuntu:jammy-20240911.1
21+
LABEL org.opencontainers.image.authors="Apache Spark project <[email protected]>"
22+
LABEL org.opencontainers.image.licenses="Apache-2.0"
23+
LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for SparkR"
24+
# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
25+
LABEL org.opencontainers.image.version=""
26+
27+
ENV FULL_REFRESH_DATE 20241114
28+
29+
ENV DEBIAN_FRONTEND noninteractive
30+
ENV DEBCONF_NONINTERACTIVE_SEEN true
31+
32+
RUN apt-get update && apt-get install -y \
33+
build-essential \
34+
ca-certificates \
35+
curl \
36+
gfortran \
37+
git \
38+
gnupg \
39+
libcurl4-openssl-dev \
40+
libfontconfig1-dev \
41+
libfreetype6-dev \
42+
libfribidi-dev \
43+
libgit2-dev \
44+
libharfbuzz-dev \
45+
libjpeg-dev \
46+
liblapack-dev \
47+
libopenblas-dev \
48+
libpng-dev \
49+
libpython3-dev \
50+
libssl-dev \
51+
libtiff5-dev \
52+
libxml2-dev \
53+
pandoc \
54+
pkg-config \
55+
qpdf \
56+
r-base \
57+
software-properties-common \
58+
wget \
59+
zlib1g-dev \
60+
&& rm -rf /var/lib/apt/lists/*
61+
62+
RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
63+
RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
64+
RUN gpg -a --export E084DAB9 | apt-key add -
65+
RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
66+
67+
# See more in SPARK-39959, roxygen2 < 7.2.1
68+
RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', \
69+
'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', \
70+
'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
71+
Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
72+
Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
73+
Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
74+
Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
75+
76+
# See more in SPARK-39735
77+
ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"

0 commit comments

Comments
 (0)