@@ -2,6 +2,8 @@ ARG CUDA_VERSION=12.8.1
22FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
33
44ARG BUILD_TYPE=blackwell
5+ ARG DEEPEP_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
6+ ARG CMAKE_BUILD_PARALLEL_LEVEL=2
57ENV DEBIAN_FRONTEND=noninteractive \
68 CUDA_HOME=/usr/local/cuda \
79 GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
@@ -16,7 +18,7 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
1618 tzdata \
1719 software-properties-common netcat-openbsd kmod unzip openssh-server \
1820 curl wget lsof zsh ccache tmux htop git-lfs tree \
19- python3 python3-pip python3-dev libpython3-dev \
21+ python3 python3-pip python3-dev libpython3-dev python3-venv \
2022 build-essential cmake \
2123 libopenmpi-dev libnuma1 libnuma-dev \
2224 libibverbs-dev libibverbs1 libibumad3 \
@@ -36,13 +38,8 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
3638 && rm -rf /var/lib/apt/lists/* \
3739 && apt-get clean
3840
39-
40- # --- Install SGLang missing package
41- RUN pip install netifaces
42-
43- # --- Install nightly PyTorch ---
44- RUN pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 --force-reinstall
45-
41+ # --- Install SGLang missing package for blackwell build type
42+ RUN python3 -m pip install openai httpx
4643
4744# GDRCopy installation
4845RUN mkdir -p /tmp/gdrcopy && cd /tmp \
@@ -56,12 +53,12 @@ RUN mkdir -p /tmp/gdrcopy && cd /tmp \
5653RUN ln -sf /usr/lib/$(uname -m)-linux-gnu/libmlx5.so.1 /usr/lib/$(uname -m)-linux-gnu/libmlx5.so
5754
5855# Clone and install SGLang
59- # FIXME: Forcing SGLang to 2a2d3478afe8cdb336888f2e6faa3775ac40254e because sgl-kernel v0.2.5 is missing aarch64 package
56+ # NOTE: flashinfer v0.2.9rc1 is not installing for aarch64
6057WORKDIR /sgl-workspace
6158RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
6259 && git clone https://github.com/sgl-project/sglang.git \
6360 && cd sglang \
64- && git checkout 2a2d3478afe8cdb336888f2e6faa3775ac40254e \
61+ && git checkout a167fd0bcb9ef4b0f4331a109e40c8cdc770b026 \
6562 && case "$CUDA_VERSION" in \
6663 12.6.1) CUINDEX=126 ;; \
6764 12.8.1) CUINDEX=128 ;; \
@@ -70,38 +67,33 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5li
7067 && python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
7168 && if [ "$CUDA_VERSION" = "12.8.1" ]; then \
7269 python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.6 --force-reinstall --no-deps ; \
73- python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.2.4 /sgl_kernel-0.2.4 +cu128-cp39-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
70+ python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.2.7 /sgl_kernel-0.2.7 +cu128-cp39-abi3-manylinux2014_$(uname -m).whl --force-reinstall --no-deps ; \
7471 fi
7572
76-
77- # Build NVSHMEM
78- # Build and install NVSHMEM + DeepEP
79- RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz \
80- && git clone https://github.com/fzyzcjy/DeepEP.git \
81- && cd DeepEP \
82- && git checkout 1b14ad661c7640137fcfe93cccb2694ede1220b0 \
83- && cd .. \
84- && tar -xf nvshmem_src_3.2.5-1.txz && mv nvshmem_src nvshmem \
85- && cd nvshmem \
86- && git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \
87- && sed -i '1i#include <unistd.h>' examples/moe_shuffle.cu \
88- && rm -f /sgl-workspace/nvshmem_src_3.2.5-1.txz \
89- && NVSHMEM_SHMEM_SUPPORT=0 \
90- NVSHMEM_UCX_SUPPORT=0 \
91- NVSHMEM_USE_NCCL=0 \
92- NVSHMEM_MPI_SUPPORT=0 \
93- NVSHMEM_IBGDA_SUPPORT=1 \
94- NVSHMEM_PMIX_SUPPORT=0 \
95- NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
96- NVSHMEM_USE_GDRCOPY=1 \
97- cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" \
98- && cmake --build build --target install -j \
99- && cd /sgl-workspace/DeepEP \
100- && NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
73+ # Build and install NVSHMEM + DeepEP
74+ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
75+ && git clone https://github.com/fzyzcjy/DeepEP.git \
76+ && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd .. \
77+ && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
78+ && cd nvshmem \
79+ && rm -f /sgl-workspace/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
80+ && NVSHMEM_SHMEM_SUPPORT=0 \
81+ NVSHMEM_UCX_SUPPORT=0 \
82+ NVSHMEM_USE_NCCL=0 \
83+ NVSHMEM_MPI_SUPPORT=0 \
84+ NVSHMEM_IBGDA_SUPPORT=1 \
85+ NVSHMEM_PMIX_SUPPORT=0 \
86+ NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
87+ NVSHMEM_USE_GDRCOPY=1 \
88+ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES="100;120" \
89+ && cmake --build build --target install -j${CMAKE_BUILD_PARALLEL_LEVEL} \
90+ && cd /sgl-workspace/DeepEP \
91+ && NVSHMEM_DIR=${NVSHMEM_DIR} pip install .
10192
10293# Python tools
10394RUN python3 -m pip install --no-cache-dir \
10495 datamodel_code_generator \
96+ mooncake_transfer_engine==0.3.5 \
10597 pre-commit \
10698 pytest \
10799 black \
@@ -145,9 +137,6 @@ RUN apt update -y \
145137 && apt update -y \
146138 && apt install nsight-systems-cli -y
147139
148- # --- Install Mooncake ---
149- RUN pip install mooncake-transfer-engine==0.3.5
150-
151140# Set up locale
152141RUN locale-gen en_US.UTF-8
153142ENV LANG en_US.UTF-8
0 commit comments