From 6b3fbfb8057f7f38c9c8ec0d1acfd4edb919cca5 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 1 Sep 2025 13:40:03 +0800 Subject: [PATCH] fully enable v1 on turing Signed-off-by: Isotr0py --- vllm/engine/arg_utils.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 06bd97dd6abe..5c49508b6d1f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1432,17 +1432,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=True) return False - # Triton v3.3 has f16 conversion regression issue on Turing and Volta, - # which broke fp16 inference - # see: https://github.com/triton-lang/triton/issues/6698 - if (current_platform.is_cuda() - and not current_platform.has_device_capability(80) - and model_config.dtype == torch.float16): - _raise_or_fallback( - feature_name="Compute Capability < 8.0 with FP16", - recommend_to_remove=False) - return False - if self.kv_cache_dtype != "auto": supported = current_platform.is_kv_cache_dtype_supported( self.kv_cache_dtype, model_config)