We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 90a01bb commit 7334abcCopy full SHA for 7334abc
vllm/model_executor/layers/quantization/quark/quark_moe.py
@@ -388,6 +388,8 @@ def apply(
388
scoring_func=scoring_func,
389
e_score_correction_bias=e_score_correction_bias)
390
391
+ # We pass `per_channel_quant=True` as OCP MXFP4 quantization is a
392
+ # per-token quantization scheme (with groups of `OCP_MX_BLOCK_SIZE`).
393
out = fused_experts(
394
x,
395
layer.w13_weight,
@@ -405,5 +407,6 @@ def apply(
405
407
a2_scale=None,
406
408
block_shape=None,
409
per_channel_quant=True,
410
+ activation=activation,
411
)
412
return out
0 commit comments