diff --git a/Dockerfile b/Dockerfile index 63314b906f15..7c7aa507ec97 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,22 +14,19 @@ ARG PYTHON_VERSION=3.12 ARG TARGETPLATFORM ENV DEBIAN_FRONTEND=noninteractive -# Install Python and other dependencies +# Install minimal dependencies and uv RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl sudo \ - && add-apt-repository ppa:deadsnakes/ppa \ - && apt-get update -y \ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ - && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ - && python3 --version && python3 -m pip --version -# Install uv for faster pip installs -RUN --mount=type=cache,target=/root/.cache/uv \ - python3 -m pip install uv + && apt-get install -y ccache git curl wget sudo \ + && curl -LsSf https://astral.sh/uv/install.sh | sh + +# Add uv to PATH +ENV PATH="/root/.local/bin:$PATH" +# Create venv with specified Python and activate by placing at the front of path +ENV VIRTUAL_ENV="/opt/venv" +RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} +ENV PATH="$VIRTUAL_ENV/bin:$PATH" # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 # as it was causing spam when compiling the CUTLASS kernels @@ -47,21 +44,19 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ WORKDIR /workspace -# install build and runtime dependencies - # arm64 (GH200) build follows the practice of "use existing pytorch" build, # we need to install torch and torchvision from the nightly builds first, # pytorch will not appear as a vLLM dependency in all of the following steps # after this step RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu126 "torch==2.7.0.dev20250121+cu126" "torchvision==0.22.0.dev20250121"; \ + uv pip install --index-url https://download.pytorch.org/whl/nightly/cu126 "torch==2.7.0.dev20250121+cu126" "torchvision==0.22.0.dev20250121"; \ fi COPY requirements-common.txt requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements-cuda.txt + uv pip install -r requirements-cuda.txt # cuda arch list used by torch # can be useful for both `dev` and `test` @@ -82,12 +77,12 @@ ARG TARGETPLATFORM COPY requirements-build.txt requirements-build.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements-build.txt + uv pip install -r requirements-build.txt COPY . . ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ - if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi + if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi # max jobs used by Ninja to build extensions ARG max_jobs=2 @@ -124,6 +119,9 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,source=.git,target=.git \ if [ "$USE_SCCACHE" != "1" ]; then \ + # Clean any existing CMake artifacts + rm -rf .deps && \ + mkdir -p .deps && \ python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ fi @@ -147,7 +145,7 @@ COPY requirements-lint.txt requirements-lint.txt COPY requirements-test.txt requirements-test.txt COPY requirements-dev.txt requirements-dev.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements-dev.txt + uv pip install -r requirements-dev.txt #################### DEV IMAGE #################### #################### vLLM installation IMAGE #################### @@ -163,23 +161,20 @@ ARG TARGETPLATFORM RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment -# Install Python and other dependencies +# Install minimal dependencies and uv RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ && apt-get update -y \ - && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ - && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ - && add-apt-repository ppa:deadsnakes/ppa \ - && apt-get update -y \ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ - && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ - && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ - && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ - && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ - && python3 --version && python3 -m pip --version -# Install uv for faster pip installs -RUN --mount=type=cache,target=/root/.cache/uv \ - python3 -m pip install uv + && apt-get install -y ccache git curl wget sudo vim \ + && apt-get install -y ffmpeg libsm6 libxext6 libgl1 libibverbs-dev \ + && curl -LsSf https://astral.sh/uv/install.sh | sh + +# Add uv to PATH +ENV PATH="/root/.local/bin:$PATH" +# Create venv with specified Python and activate by placing at the front of path +ENV VIRTUAL_ENV="/opt/venv" +RUN uv venv --python ${PYTHON_VERSION} --seed ${VIRTUAL_ENV} +ENV PATH="$VIRTUAL_ENV/bin:$PATH" # Workaround for https://github.com/openai/triton/issues/2507 and # https://github.com/pytorch/pytorch/issues/107960 -- hopefully @@ -193,13 +188,13 @@ RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # after this step RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system --index-url https://download.pytorch.org/whl/nightly/cu124 "torch==2.6.0.dev20241210+cu124" "torchvision==0.22.0.dev20241215"; \ + uv pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torch==2.6.0.dev20241210+cu124" "torchvision==0.22.0.dev20241215"; \ fi # Install vllm wheel first, so that torch etc will be installed. RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system dist/*.whl --verbose + uv pip install dist/*.whl --verbose # If we need to build FlashInfer wheel before its release: # $ export FLASHINFER_ENABLE_AOT=1 @@ -214,9 +209,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # $ # upload the wheel to a public location, e.g. https://wheels.vllm.ai/flashinfer/524304395bd1d8cd7d07db083859523fcaa246a4/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl RUN --mount=type=cache,target=/root/.cache/uv \ -. /etc/environment && \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ - uv pip install --system https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \ + uv pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.1.post1/flashinfer_python-0.2.1.post1+cu124torch2.5-cp38-abi3-linux_x86_64.whl ; \ fi COPY examples examples @@ -226,7 +220,7 @@ COPY examples examples # TODO: Remove this once FlashInfer AOT wheel is fixed COPY requirements-build.txt requirements-build.txt RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements-build.txt + uv pip install -r requirements-build.txt #################### vLLM installation IMAGE #################### @@ -239,15 +233,15 @@ ADD . /vllm-workspace/ # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -r requirements-dev.txt + uv pip install -r requirements-dev.txt # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system -e tests/vllm_test_utils + uv pip install -e tests/vllm_test_utils # enable fast downloads from hf (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install --system hf_transfer + uv pip install hf_transfer ENV HF_HUB_ENABLE_HF_TRANSFER 1 # Copy in the v1 package for testing (it isn't distributed yet) @@ -268,9 +262,9 @@ FROM vllm-base AS vllm-openai-base # install additional dependencies for openai api server RUN --mount=type=cache,target=/root/.cache/uv \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ + uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ else \ - uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ + uv pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \ fi ENV VLLM_USAGE_SOURCE production-docker-image