From 12ed13ed570ebcfb2a9a0a41191b0424643fa4f3 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Thu, 22 May 2025 20:03:17 +0000 Subject: [PATCH 1/3] re-enable blackwell Signed-off-by: Lucas Wilkinson --- CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffb801d62619..a6c54be9530b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,11 +30,7 @@ set(ignoreMe "${VLLM_PYTHON_PATH}") set(PYTHON_SUPPORTED_VERSIONS "3.9" "3.10" "3.11" "3.12") # Supported NVIDIA architectures. -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL) - set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0") -else() - set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0") -endif() +set(CUDA_SUPPORTED_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0") # Supported AMD GPU architectures. set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201") From c01303a350bacc80970a9ae5e700a07ce36ec31b Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 23 May 2025 02:24:02 +0000 Subject: [PATCH 2/3] make `TORCH_CUDA_ARCH_LIST` global for the docker file Signed-off-by: Lucas Wilkinson --- docker/Dockerfile | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index cc3499d1f0a9..2a1e4fa10009 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,6 +6,14 @@ # docs/source/assets/contributing/dockerfile-stages-dependency.png ARG CUDA_VERSION=12.8.1 +# cuda arch list used by torch +# can be useful for both `dev` and `test` +# explicitly set the list to avoid issues with torch 2.2 +# see https://github.com/pytorch/pytorch/pull/123243 +# Also allow explicitly setting for cuda-11.8 +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' +ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} + #################### BASE BUILD IMAGE #################### # prepare basic build environment FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base @@ -73,12 +81,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/cuda.txt \ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') -# cuda arch list used by torch -# can be useful for both `dev` and `test` -# explicitly set the list to avoid issues with torch 2.2 -# see https://github.com/pytorch/pytorch/pull/123243 -ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' -ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} # Override the arch list for flash-attn to reduce the binary size ARG vllm_fa_cmake_gpu_arches='80-real;90-real' ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} From 6bc924c6bd9755b49304e05802693cb1bb7bd7ab Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 23 May 2025 02:31:54 +0000 Subject: [PATCH 3/3] just duplicate Signed-off-by: Lucas Wilkinson --- docker/Dockerfile | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2a1e4fa10009..e75824611976 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,13 +6,6 @@ # docs/source/assets/contributing/dockerfile-stages-dependency.png ARG CUDA_VERSION=12.8.1 -# cuda arch list used by torch -# can be useful for both `dev` and `test` -# explicitly set the list to avoid issues with torch 2.2 -# see https://github.com/pytorch/pytorch/pull/123243 -# Also allow explicitly setting for cuda-11.8 -ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' -ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} #################### BASE BUILD IMAGE #################### # prepare basic build environment @@ -81,6 +74,13 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system -r requirements/cuda.txt \ --extra-index-url https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') +# cuda arch list used by torch +# can be useful for both `dev` and `test` +# explicitly set the list to avoid issues with torch 2.2 +# see https://github.com/pytorch/pytorch/pull/123243 +# Also allow explicitly setting for cuda-11.8 +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' +ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} # Override the arch list for flash-attn to reduce the binary size ARG vllm_fa_cmake_gpu_arches='80-real;90-real' ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} @@ -191,6 +191,15 @@ WORKDIR /vllm-workspace ENV DEBIAN_FRONTEND=noninteractive ARG TARGETPLATFORM +# TODO: Can be removed once vllm-base starts from the base image +# cuda arch list used by torch +# can be useful for both `dev` and `test` +# explicitly set the list to avoid issues with torch 2.2 +# see https://github.com/pytorch/pytorch/pull/123243 +# Also allow explicitly setting for cuda-11.8 +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0+PTX' +ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} + SHELL ["/bin/bash", "-c"] RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \