File tree Expand file tree Collapse file tree
python/sglang/srt/layers/quantization Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1515if is_cuda ():
1616 import deep_gemm
1717 from deep_gemm import get_num_sms
18+ from deep_gemm .jit .compiler import get_nvcc_compiler
1819 from deep_gemm .jit_kernels .gemm import get_best_configs
1920 from deep_gemm .jit_kernels .runtime import FP8GemmRuntime , GemmType
2021 from deep_gemm .jit_kernels .tuner import jit_tuner
@@ -48,7 +49,17 @@ def get_enable_jit_deepgemm():
4849# Refer to https://github.com/deepseek-ai/DeepGEMM/commit/d75b218b7b8f4a5dd5406ac87905039ead3ae42f
4950# NVRTC may have performance loss with some cases.
5051# And NVCC JIT speed is also 9x faster in the ref commit
51- os .environ ["DG_JIT_USE_NVRTC" ] = os .getenv ("SGL_DG_USE_NVRTC" , "0" )
52+ _USE_NVRTC_DEFAULT = "0"
53+ if _ENABLE_JIT_DEEPGEMM :
54+ try :
55+ get_nvcc_compiler ()
56+ except :
57+ logger .warning (
58+ "NVCC Compiler not found, use NVRTC for DeepGEMM JIT "
59+ "and may have performance loss with some cases."
60+ )
61+ _USE_NVRTC_DEFAULT = "1"
62+ os .environ ["DG_JIT_USE_NVRTC" ] = os .getenv ("SGL_DG_USE_NVRTC" , _USE_NVRTC_DEFAULT )
5263
5364
5465def update_deep_gemm_config (gpu_id : int , server_args : ServerArgs ):
You can’t perform that action at this time.
0 commit comments