Skip to content

Commit d6c7e53

Browse files
bnellnmxuebwang-amd
authored andcommitted
[Kernels] Enable DeepGEMM by default (vllm-project#24462)
Signed-off-by: Bill Nell <[email protected]> Signed-off-by: xuebwang-amd <[email protected]>
1 parent 1c971cd commit d6c7e53

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

vllm/envs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
VLLM_TPU_BUCKET_PADDING_GAP: int = 0
136136
VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None
137137
VLLM_TPU_USING_PATHWAYS: bool = False
138-
VLLM_USE_DEEP_GEMM: bool = False
138+
VLLM_USE_DEEP_GEMM: bool = True
139139
VLLM_USE_DEEP_GEMM_E8M0: bool = True
140140
VLLM_USE_DEEP_GEMM_E8M0_HOPPER: bool = False
141141
VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False
@@ -1044,7 +1044,7 @@ def get_vllm_port() -> Optional[int]:
10441044

10451045
# Allow use of DeepGemm kernels for fused moe ops.
10461046
"VLLM_USE_DEEP_GEMM":
1047-
lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "0"))),
1047+
lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "1"))),
10481048

10491049
# Whether to use E8M0 scaling when DeepGEMM is used on Blackwell GPUs.
10501050
"VLLM_USE_DEEP_GEMM_E8M0":

0 commit comments

Comments
 (0)