Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
325 changes: 197 additions & 128 deletions .github/workflows/docker-build.yml

Large diffs are not rendered by default.

21 changes: 0 additions & 21 deletions .github/workflows/docker-publish.yml

This file was deleted.

2 changes: 1 addition & 1 deletion docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,5 @@ echo "Show installed packages:"
docker run --rm -i pytorchignite/${image_name}:${image_tag} pip list

echo "Test pytorchignite/${image_name}:${image_tag}"
python test_image.py pytorchignite/${image_name}:${image_tag}
docker run --rm -i -v $PWD:/ws -w /ws -e HVD_VERSION=${HVD_VERSION:-} -e MSDP_VERSION=${MSDP_VERSION:-} pytorchignite/${image_name}:${image_tag} /bin/bash -c "python test_image.py pytorchignite/${image_name}:${image_tag}"
echo "OK"
4 changes: 2 additions & 2 deletions docker/docker.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[DEFAULT]
build_docker_image_pytorch_version = 2.0.0-cuda11.7-cudnn8
build_docker_image_hvd_version = v0.27.0
build_docker_image_pytorch_version = 2.1.0-cuda12.1-cudnn8
build_docker_image_hvd_version = v0.28.1
build_docker_image_msdp_version = v0.8.1
7 changes: 4 additions & 3 deletions docker/hvd/Dockerfile.hvd-apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-hvd-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .

ARG HVD_VERSION

Expand All @@ -30,6 +28,9 @@ RUN apt-get update && apt-get install -y git && \
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
conda install -y cmake nccl -c conda-forge && \
cd /horovod && \
# temporary -std=c++17 fix
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
rm -rf /var/lib/apt/lists/*

Expand Down
3 changes: 2 additions & 1 deletion docker/hvd/Dockerfile.hvd-apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/hvd/Dockerfile.hvd-apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.hvd-apex-vision
FROM pytorchignite/hvd-apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
3 changes: 3 additions & 0 deletions docker/hvd/Dockerfile.hvd-base
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ RUN apt-get update && apt-get install -y git && \
git clone --recursive --depth 1 --branch ${HVD_VERSION} https://github.com/horovod/horovod.git /horovod && \
conda install -y cmake nccl -c conda-forge && \
cd /horovod && \
# temporary -std=c++17 fix
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt && \
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt && \
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_NCCL_LINK=SHARED HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_PYTORCH=1 pip wheel --no-cache-dir . && \
rm -rf /var/lib/apt/lists/*

Expand Down
3 changes: 2 additions & 1 deletion docker/hvd/Dockerfile.hvd-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/hvd-base:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/hvd/Dockerfile.hvd-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.hvd-vision
FROM pytorchignite/hvd-base:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
4 changes: 1 addition & 3 deletions docker/main/Dockerfile.apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .
# 2/ Build the runtime image
FROM pytorch/pytorch:${PTH_VERSION}-runtime
Expand Down
3 changes: 2 additions & 1 deletion docker/main/Dockerfile.apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/main/Dockerfile.apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.apex-vision
FROM pytorchignite/apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
3 changes: 2 additions & 1 deletion docker/main/Dockerfile.nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/base:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/main/Dockerfile.vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.vision
FROM pytorchignite/base:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
4 changes: 1 addition & 3 deletions docker/msdp/Dockerfile.msdp-apex
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ ARG PTH_VERSION
# 1/Building apex with pytorch:*-devel
FROM pytorch/pytorch:${PTH_VERSION}-devel AS apex-msdp-builder

ARG ARG_TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX 8.0 8.6"
ENV TORCH_CUDA_ARCH_LIST=$ARG_TORCH_CUDA_ARCH_LIST
ENV CUDA_HOME=/usr/local/cuda

# Install git
Expand All @@ -21,7 +19,7 @@ RUN echo "Setup NVIDIA Apex" && \
git clone https://github.com/NVIDIA/apex $tmp_apex_path && \
cd $tmp_apex_path && \
pip install packaging && \
pip wheel -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" .
pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" .

# For pip --use-feature option
RUN python -m pip install --upgrade pip
Expand Down
3 changes: 2 additions & 1 deletion docker/msdp/Dockerfile.msdp-apex-nlp
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ FROM pytorchignite/msdp-apex:latest
# Ignite NLP dependencies
RUN pip install --upgrade --no-cache-dir transformers \
spacy \
nltk
nltk \
torchtext
9 changes: 0 additions & 9 deletions docker/msdp/Dockerfile.msdp-apex-vision
Original file line number Diff line number Diff line change
@@ -1,15 +1,6 @@
# Dockerfile.msdp-apex-vision
FROM pytorchignite/msdp-apex:latest

# Install opencv dependencies
RUN apt-get update && \
apt-get -y install --no-install-recommends libglib2.0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx && \
rm -rf /var/lib/apt/lists/*

# Ignite vision dependencies
RUN pip install --upgrade --no-cache-dir albumentations \
image-dataset-viz \
Expand Down
64 changes: 42 additions & 22 deletions docker/push_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,46 +14,66 @@ if [ -z $DOCKER_TOKEN ]; then
exit 1
fi

if [ -z "$1" ]; then
push_selected_image="all"
else
push_selected_image="$1"
fi

set -eu

echo $DOCKER_TOKEN | docker login --username=$DOCKER_USER --password-stdin

set -xeu

image_name="base"
image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`

for image_name in "base" "vision" "nlp" "apex" "apex-vision" "apex-nlp"
do
if [ ${folder_name} == "all" ]; then

docker push pytorchignite/${image_name}:latest
docker push pytorchignite/${image_name}:${image_tag}
image_name="base"
image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`

done
for image_name in "base" "vision" "nlp" "apex" "apex-vision" "apex-nlp"
do

docker push pytorchignite/${image_name}:latest
docker push pytorchignite/${image_name}:${image_tag}

image_name="hvd-base"
image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`
done

for image_name in "hvd-base" "hvd-vision" "hvd-nlp" "hvd-apex" "hvd-apex-vision" "hvd-apex-nlp"
do
image_name="hvd-base"
image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`

docker push pytorchignite/${image_name}:latest
docker push pytorchignite/${image_name}:${image_tag}
for image_name in "hvd-base" "hvd-vision" "hvd-nlp" "hvd-apex" "hvd-apex-vision" "hvd-apex-nlp"
do

docker push pytorchignite/${image_name}:latest
docker push pytorchignite/${image_name}:${image_tag}

done

done
# DEPRECATED due to no activity
# image_name="msdp-apex"
# image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`

# DEPRECATED due to no activity
# image_name="msdp-apex"
# image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`
# for image_name in "msdp-apex" "msdp-apex-vision" "msdp-apex-nlp"
# do

# for image_name in "msdp-apex" "msdp-apex-vision" "msdp-apex-nlp"
# do
# docker push pytorchignite/${image_name}:latest
# docker push pytorchignite/${image_name}:${image_tag}

# done

else

image_name=${push_selected_image}
image_tag=`docker run --rm -i pytorchignite/${image_name}:latest python -c "import torch; import ignite; print(torch.__version__ + \"-\" + ignite.__version__, end=\"\")"`

docker push pytorchignite/${image_name}:latest
docker push pytorchignite/${image_name}:${image_tag}

fi

# docker push pytorchignite/${image_name}:latest
# docker push pytorchignite/${image_name}:${image_tag}

# done

# If use locally, mind to clean dangling images
# docker images | grep 'pytorchignite\|<none>' | awk '{print $3}' | xargs docker rmi -f
Expand Down
Loading