Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 0 additions & 36 deletions .github/workflows/release-docker-blackwell.yml

This file was deleted.

47 changes: 0 additions & 47 deletions .github/workflows/release-docker-deepep.yml

This file was deleted.

19 changes: 15 additions & 4 deletions .github/workflows/release-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ jobs:
environment: 'prod'
strategy:
matrix:
cuda_version: ['12.4.1']
build_type: ['all']
cuda_version: ['12.6.1', '12.8.1']
build_type: ['all', 'blackwell']
exclude:
- cuda_version: '12.6.1'
build_type: 'blackwell'
- cuda_version: '12.8.1'
build_type: 'all'
steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache
Expand All @@ -41,6 +46,10 @@ jobs:
cuda_tag="cu124"
elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
cuda_tag="cu125"
elif [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then
cuda_tag="cu126"
elif [ "${{ matrix.cuda_version }}" = "12.8.1" ]; then
cuda_tag="cu128"
else
echo "Unsupported CUDA version"
exit 1
Expand All @@ -52,15 +61,17 @@ jobs:
tag_suffix=""
elif [ "${{ matrix.build_type }}" = "srt" ]; then
tag_suffix="-srt"
elif [ "${{ matrix.build_type }}" = "blackwell" ]; then
tag_suffix="-b200"
else
echo "Unsupported build type"
exit 1
fi

docker build . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
docker push lmsysorg/sglang:${tag}${tag_suffix}

if [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then
if [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
docker push lmsysorg/sglang:latest${tag_suffix}
fi
131 changes: 89 additions & 42 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,51 +1,98 @@
ARG CUDA_VERSION=12.4.1

FROM nvcr.io/nvidia/tritonserver:24.12-py3-min
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04

ARG BUILD_TYPE=all
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_FRONTEND=noninteractive \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/ \
NVSHMEM_DIR=/sgl-workspace/nvshmem/install

# Set timezone and install all packages
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt update -y \
&& apt install software-properties-common -y \
&& apt install python3 python3-pip -y \
&& apt install curl git sudo libibverbs-dev -y \
&& apt install rdma-core infiniband-diags openssh-server perftest -y \
&& python3 --version \
&& python3 -m pip --version \
&& rm -rf /var/lib/apt/lists/* \
&& apt clean

# For openbmb/MiniCPM models
RUN pip3 install datamodel_code_generator --break-system-packages
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
&& apt-get update && apt-get install -y --no-install-recommends \
tzdata \
software-properties-common netcat-openbsd kmod unzip openssh-server \
curl wget lsof zsh ccache tmux htop git-lfs tree \
python3 python3-pip python3-dev libpython3-dev \
build-essential cmake \
libopenmpi-dev libnuma1 libnuma-dev \
libibverbs-dev libibverbs1 libibumad3 \
librdmacm1 libnl-3-200 libnl-route-3-200 libnl-route-3-dev libnl-3-dev \
ibverbs-providers infiniband-diags perftest \
libgoogle-glog-dev libgtest-dev libjsoncpp-dev libunwind-dev \
libboost-all-dev libssl-dev \
libgrpc-dev libgrpc++-dev libprotobuf-dev protobuf-compiler-grpc \
pybind11-dev \
libhiredis-dev libcurl4-openssl-dev \
libczmq4 libczmq-dev \
libfabric-dev \
patchelf \
nvidia-dkms-550 \
devscripts debhelper fakeroot dkms check libsubunit0 libsubunit-dev \
&& ln -sf /usr/bin/python3 /usr/bin/python \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

# GDRCopy installation
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
&& cd gdrcopy/packages \
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
&& cd / && rm -rf /tmp/gdrcopy

# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so

# Clone and install SGLang
WORKDIR /sgl-workspace
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel html5lib six \
&& git clone --depth=1 https://github.com/sgl-project/sglang.git \
&& cd sglang \
&& case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
*) echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1 ;; \
esac \
&& python3 -m pip install --no-cache-dir -e "python[${BUILD_TYPE}]" --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX} \
&& if [ "$CUDA_VERSION" = "12.8.1" ]; then \
python3 -m pip install --no-cache-dir nvidia-nccl-cu12==2.27.3 --force-reinstall --no-deps ; \
python3 -m pip install --no-cache-dir https://github.com/sgl-project/whl/releases/download/v0.1.9/sgl_kernel-0.1.9+cu128-cp39-abi3-manylinux2014_x86_64.whl --force-reinstall --no-deps ; \
fi

# Build and install NVSHMEM + DeepEP
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz \
&& git clone https://github.com/deepseek-ai/DeepEP.git \
&& tar -xf nvshmem_src_3.2.5-1.txz && mv nvshmem_src nvshmem \
&& cd nvshmem \
&& git apply /sgl-workspace/DeepEP/third-party/nvshmem.patch \
&& sed -i '1i#include <unistd.h>' examples/moe_shuffle.cu \
&& rm -f /sgl-workspace/nvshmem_src_3.2.5-1.txz \
&& NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_DIR} -DCMAKE_CUDA_ARCHITECTURES=90 \
&& cmake --build build --target install -j \
&& cd /sgl-workspace/DeepEP \
&& NVSHMEM_DIR=${NVSHMEM_DIR} pip install .

ARG CUDA_VERSION
RUN python3 -m pip install --upgrade pip setuptools wheel html5lib six --break-system-packages --ignore-installed \
&& git clone --depth=1 https://github.com/sgl-project/sglang.git \
&& if [ "$CUDA_VERSION" = "12.1.1" ]; then \
export CUINDEX=121; \
elif [ "$CUDA_VERSION" = "12.4.1" ]; then \
export CUINDEX=124; \
elif [ "$CUDA_VERSION" = "12.8.1" ]; then \
export CUINDEX=124; \
elif [ "$CUDA_VERSION" = "11.8.0" ]; then \
export CUINDEX=118; \
python3 -m pip install --no-cache-dir sgl-kernel -i https://docs.sglang.ai/whl/cu118 --break-system-packages; \
else \
echo "Unsupported CUDA version: $CUDA_VERSION" && exit 1; \
fi \
&& if [ "$CUDA_VERSION" = "12.4.1" ]; then \
python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu126 --break-system-packages; \
else \
python3 -m pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cu${CUINDEX} --break-system-packages; \
fi \
&& cd sglang \
&& python3 -m pip --no-cache-dir install -e "python[${BUILD_TYPE}]" --break-system-packages \
&& if [ "$CUDA_VERSION" = "12.8.1" ]; then \
python3 -m pip install nvidia-nccl-cu12==2.26.2.post1 --force-reinstall --no-deps --break-system-packages; \
fi
# Python tools
RUN python3 -m pip install --no-cache-dir \
datamodel_code_generator \
mooncake_transfer_engine==0.3.3.post2 \
pre-commit \
pytest \
black \
isort \
icdiff \
uv \
wheel \
scikit-build-core

ENV DEBIAN_FRONTEND=interactive
Loading
Loading