Skip to content

Commit 313c5a3

Browse files
committed
lint
Signed-off-by: jiahanc <[email protected]>
1 parent 7614946 commit 313c5a3

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

vllm/model_executor/layers/quantization/modelopt.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,7 +1489,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
14891489
)
14901490
else:
14911491
w13_input_scale = layer.w13_input_scale.max(dim=1).values.to(torch.float32)
1492-
14931492
layer.g1_alphas = Parameter(
14941493
(w13_input_scale * w13_weight_scale_2).to(torch.float32),
14951494
requires_grad=False,
@@ -1499,6 +1498,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
14991498
layer.w13_input_scale_quant = Parameter(
15001499
(1 / w13_input_scale).to(torch.float32), requires_grad=False
15011500
)
1501+
15021502
# GEMM 2 processing
15031503
if use_global_sf:
15041504
# For backends provide by Flashinfer, the input global scales are
@@ -1508,7 +1508,6 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
15081508
)
15091509
else:
15101510
w2_input_scale = layer.w2_input_scale
1511-
15121511
layer.g2_alphas = Parameter(
15131512
(w2_input_scale * layer.w2_weight_scale_2).to(torch.float32),
15141513
requires_grad=False,
@@ -1642,7 +1641,6 @@ def apply(
16421641
from vllm.model_executor.models.llama4 import Llama4MoE
16431642

16441643
a1_gscale = layer.w13_input_scale_quant
1645-
16461644
(hidden_states_fp4, hidden_states_scale_linear_fp4) = (
16471645
flashinfer.fp4_quantize(
16481646
x,

0 commit comments

Comments
 (0)