Skip to content

Commit 4ffff1d

Browse files
committed
remove per_channel_quant=True
Signed-off-by: Felix Marty <[email protected]>
1 parent 7334abc commit 4ffff1d

File tree

2 files changed

+0
-4
lines changed

2 files changed

+0
-4
lines changed

vllm/model_executor/layers/fused_moe/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ def _mxfp4_quantize(
8484
block_shape: Optional[list[int]] = None,
8585
) -> tuple[torch.Tensor, None]:
8686
assert block_shape is None
87-
assert per_act_token_quant
8887
if not current_platform.supports_mx():
8988
A = quant_dequant_mxfp4(A)
9089
else:

vllm/model_executor/layers/quantization/quark/quark_moe.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,6 @@ def apply(
388388
scoring_func=scoring_func,
389389
e_score_correction_bias=e_score_correction_bias)
390390

391-
# We pass `per_channel_quant=True` as OCP MXFP4 quantization is a
392-
# per-token quantization scheme (with groups of `OCP_MX_BLOCK_SIZE`).
393391
out = fused_experts(
394392
x,
395393
layer.w13_weight,
@@ -406,7 +404,6 @@ def apply(
406404
a1_scale=None,
407405
a2_scale=None,
408406
block_shape=None,
409-
per_channel_quant=True,
410407
activation=activation,
411408
)
412409
return out

0 commit comments

Comments
 (0)