File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed
Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change 1111from vllm .attention .selector import backend_name_to_enum , get_attn_backend
1212from vllm .config import CacheConfig , get_current_vllm_config
1313from vllm .forward_context import ForwardContext , get_forward_context
14+ from vllm .model_executor .layers .linear import UnquantizedLinearMethod
1415from vllm .model_executor .layers .quantization .base_config import (
1516 QuantizationConfig )
1617from vllm .model_executor .layers .quantization .kv_cache import BaseKVCacheMethod
@@ -97,7 +98,8 @@ def __init__(
9798
9899 quant_method = quant_config .get_quant_method (
99100 self , prefix = prefix ) if quant_config else None
100- if quant_method is not None :
101+ if quant_method is not None and not isinstance (
102+ quant_method , UnquantizedLinearMethod ):
101103 assert isinstance (quant_method , BaseKVCacheMethod )
102104 # TODO (mgoin): kv cache dtype should be specified in the FP8
103105 # checkpoint config and become the "auto" behavior
You can’t perform that action at this time.
0 commit comments