lint

jiahanc · jiahanc · commit 313c5a39709e · 2025-11-13T09:19:08.000-08:00
Signed-off-by: jiahanc &lt;173873397+jiahanc@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py
@@ -1489,7 +1489,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             )
         else:
             w13_input_scale = layer.w13_input_scale.max(dim=1).values.to(torch.float32)
-
         layer.g1_alphas = Parameter(
             (w13_input_scale * w13_weight_scale_2).to(torch.float32),
             requires_grad=False,
@@ -1499,6 +1498,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
         layer.w13_input_scale_quant = Parameter(
             (1 / w13_input_scale).to(torch.float32), requires_grad=False
         )
+
         # GEMM 2 processing
         if use_global_sf:
             # For backends provide by Flashinfer, the input global scales are
@@ -1508,7 +1508,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
             )
         else:
             w2_input_scale = layer.w2_input_scale
-
         layer.g2_alphas = Parameter(
             (w2_input_scale * layer.w2_weight_scale_2).to(torch.float32),
             requires_grad=False,
@@ -1642,7 +1641,6 @@ def apply(
             from vllm.model_executor.models.llama4 import Llama4MoE
 
             a1_gscale = layer.w13_input_scale_quant
-
             (hidden_states_fp4, hidden_states_scale_linear_fp4) = (
                 flashinfer.fp4_quantize(
                     x,