Skip to content

Commit f179a43

Browse files
authored
[CI] Fixed docker build GHA (#3101)
* [CI] Fixed docker build GHA * Fixed HVD build, vision image issue and apex build issue
1 parent f3124c9 commit f179a43

20 files changed

Lines changed: 278 additions & 307 deletions

.github/workflows/docker-build.yml

Lines changed: 197 additions & 128 deletions
Large diffs are not rendered by default.

.github/workflows/docker-publish.yml

Lines changed: 0 additions & 21 deletions
This file was deleted.

docker/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,5 +91,5 @@ echo "Show installed packages:"
9191
docker run --rm -i pytorchignite/${image_name}:${image_tag} pip list
9292

9393
echo "Test pytorchignite/${image_name}:${image_tag}"
94-
python test_image.py pytorchignite/${image_name}:${image_tag}
94+
docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} /bin/bash -c "python test_image.py pytorchignite/${image_name}:${image_tag}"
9595
echo "OK"

docker/docker.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[DEFAULT]
2-
build_docker_image_pytorch_version = 2.0.0-cuda11.7-cudnn8
3-
build_docker_image_hvd_version = v0.27.0
2+
build_docker_image_pytorch_version = 2.1.0-cuda12.1-cudnn8
3+
build_docker_image_hvd_version = v0.28.1
44
build_docker_image_msdp_version = v0.8.1

docker/hvd/Dockerfile.hvd-apex

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ ARG PTH_VERSION
66
# 1/Building apex with pytorch:*-devel
77
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-hvd-builder
88

9-
ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
10-
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
119
ENV CUDA_HOME=/usr/local/cuda
1210

1311
# Install git
@@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
2119
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
2220
cd $tmp_apex_path && \
2321
pip install packaging && \
24-
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
22+
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .
2523

2624
ARG HVD_VERSION
2725

@@ -30,6 +28,9 @@ RUN apt-get update && apt-get install -y git && \
3028
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
3129
conda install -y cmake nccl -c conda-forge && \
3230
cd /horovod && \
31+
# temporary -std=c++17 fix
32+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
33+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
3334
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
3435
rm -rf /var/lib/apt/lists/*
3536

docker/hvd/Dockerfile.hvd-apex-nlp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ FROM pytorchignite/hvd-apex:latest
44
# Ignite NLP dependencies
55
RUN pip install --upgrade --no-cache-dir transformers \
66
spacy \
7-
nltk
7+
nltk \
8+
torchtext

docker/hvd/Dockerfile.hvd-apex-vision

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
11
# Dockerfile.hvd-apex-vision
22
FROM pytorchignite/hvd-apex:latest
33

4-
# Install opencv dependencies
5-
RUN apt-get update && \
6-
apt-get -y install --no-install-recommends libglib2.0 \
7-
libsm6 \
8-
libxext6 \
9-
libxrender-dev \
10-
libgl1-mesa-glx && \
11-
rm -rf /var/lib/apt/lists/*
12-
134
# Ignite vision dependencies
145
RUN pip install --upgrade --no-cache-dir albumentations \
156
image-dataset-viz \

docker/hvd/Dockerfile.hvd-base

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ RUN apt-get update && apt-get install -y git && \
1212
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
1313
conda install -y cmake nccl -c conda-forge && \
1414
cd /horovod && \
15+
# temporary -std=c++17 fix
16+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
17+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
1518
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
1619
rm -rf /var/lib/apt/lists/*
1720

docker/hvd/Dockerfile.hvd-nlp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@ FROM pytorchignite/hvd-base:latest
44
# Ignite NLP dependencies
55
RUN pip install --upgrade --no-cache-dir transformers \
66
spacy \
7-
nltk
7+
nltk \
8+
torchtext

docker/hvd/Dockerfile.hvd-vision

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
11
# Dockerfile.hvd-vision
22
FROM pytorchignite/hvd-base:latest
33

4-
# Install opencv dependencies
5-
RUN apt-get update && \
6-
apt-get -y install --no-install-recommends libglib2.0 \
7-
libsm6 \
8-
libxext6 \
9-
libxrender-dev \
10-
libgl1-mesa-glx && \
11-
rm -rf /var/lib/apt/lists/*
12-
134
# Ignite vision dependencies
145
RUN pip install --upgrade --no-cache-dir albumentations \
156
image-dataset-viz \

0 commit comments

Comments
 (0)