Skip to content

Commit 60c1b80

Browse files
LucasWilkinsonpathorn
authored andcommitted
[Kernel] Update cutlass_scaled_mm to support 2d group (blockwise) scaling (vllm-project#11868)
1 parent d3939af commit 60c1b80

25 files changed

+1924
-346
lines changed

CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
245245
FetchContent_Declare(
246246
cutlass
247247
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
248-
GIT_TAG v3.6.0
248+
GIT_TAG v3.7.0
249249
GIT_PROGRESS TRUE
250250

251251
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
@@ -299,7 +299,12 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
299299
# CUDA 12.0 or later (and only work on Hopper, 9.0a for now).
300300
cuda_archs_loose_intersection(SCALED_MM_3X_ARCHS "9.0a" "${CUDA_ARCHS}")
301301
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.0 AND SCALED_MM_3X_ARCHS)
302-
set(SRCS "csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu")
302+
set(SRCS
303+
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x.cu"
304+
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu"
305+
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu"
306+
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu"
307+
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu")
303308
set_gencode_flags_for_srcs(
304309
SRCS "${SRCS}"
305310
CUDA_ARCHS "${SCALED_MM_3X_ARCHS}")

0 commit comments

Comments
 (0)