pass activation arg

fxmarty-amd · fxmarty-amd · commit 7334abcfda88 · 2025-07-09T12:42:08.000+02:00
Signed-off-by: Felix Marty &lt;Felix.Marty@amd.com&gt;
diff --git a/vllm/model_executor/layers/quantization/quark/quark_moe.py b/vllm/model_executor/layers/quantization/quark/quark_moe.py
@@ -388,6 +388,8 @@ def apply(
             scoring_func=scoring_func,
             e_score_correction_bias=e_score_correction_bias)
 
+        # We pass `per_channel_quant=True` as OCP MXFP4 quantization is a
+        # per-token quantization scheme (with groups of `OCP_MX_BLOCK_SIZE`).
         out = fused_experts(
             x,
             layer.w13_weight,
@@ -405,5 +407,6 @@ def apply(
             a2_scale=None,
             block_shape=None,
             per_channel_quant=True,
+            activation=activation,
         )
         return out