Skip to content

Commit 337b3c7

Browse files
alexm-redhatLeiWang1999
authored andcommitted
[Bugfix] Fix awq_marlin and gptq_marlin flags (vllm-project#6745)
Signed-off-by: LeiWang1999 <[email protected]>
1 parent 869d762 commit 337b3c7

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class AWQMarlinConfig(QuantizationConfig):
2525
def __init__(self, weight_bits: int, group_size: int, has_zp: bool,
2626
lm_head_quantized: bool) -> None:
2727
self.weight_bits = weight_bits
28-
self.pack_factor = 32 // self.weight_bits # packed into int32
28+
self.pack_factor = 32 // self.weight_bits # packed into 32bits
2929
self.group_size = group_size
3030
self.has_zp = has_zp
3131
self.lm_head_quantized = lm_head_quantized
@@ -69,7 +69,8 @@ def from_config(cls, config: Dict[str, Any]) -> "AWQMarlinConfig":
6969
def override_quantization_method(cls, hf_quant_cfg,
7070
user_quant) -> Optional[str]:
7171
can_convert = cls.is_awq_marlin_compatible(hf_quant_cfg)
72-
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
72+
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
73+
or user_quant == "awq_marlin")
7374

7475
if can_convert and is_valid_user_quant:
7576
msg = ("The model is convertible to {} during runtime."

vllm/model_executor/layers/quantization/gptq_marlin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ def override_quantization_method(cls, hf_quant_cfg,
7979
user_quant) -> Optional[str]:
8080
can_convert = cls.is_gptq_marlin_compatible(hf_quant_cfg)
8181

82-
is_valid_user_quant = (user_quant is None or user_quant == "marlin")
82+
is_valid_user_quant = (user_quant is None or user_quant == "marlin"
83+
or user_quant == "gptq_marlin")
8384

8485
if can_convert and is_valid_user_quant:
8586
msg = ("The model is convertible to {} during runtime."

0 commit comments

Comments
 (0)