File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 135135 VLLM_TPU_BUCKET_PADDING_GAP : int = 0
136136 VLLM_TPU_MOST_MODEL_LEN : Optional [int ] = None
137137 VLLM_TPU_USING_PATHWAYS : bool = False
138- VLLM_USE_DEEP_GEMM : bool = False
138+ VLLM_USE_DEEP_GEMM : bool = True
139139 VLLM_USE_DEEP_GEMM_E8M0 : bool = True
140140 VLLM_USE_DEEP_GEMM_E8M0_HOPPER : bool = False
141141 VLLM_SKIP_DEEP_GEMM_WARMUP : bool = False
@@ -1044,7 +1044,7 @@ def get_vllm_port() -> Optional[int]:
10441044
10451045 # Allow use of DeepGemm kernels for fused moe ops.
10461046 "VLLM_USE_DEEP_GEMM" :
1047- lambda : bool (int (os .getenv ("VLLM_USE_DEEP_GEMM" , "0 " ))),
1047+ lambda : bool (int (os .getenv ("VLLM_USE_DEEP_GEMM" , "1 " ))),
10481048
10491049 # Whether to use E8M0 scaling when DeepGEMM is used on Blackwell GPUs.
10501050 "VLLM_USE_DEEP_GEMM_E8M0" :
You can’t perform that action at this time.
0 commit comments