format

jiahanc · jiahanc · commit b98cbdba3190 · 2025-10-30T10:00:16.000-07:00
Signed-off-by: jiahanc &lt;173873397+jiahanc@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
-
 import torch
 
 from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py
@@ -1291,6 +1291,7 @@ def apply(
         # can override fused_experts or cutlass but not rocm or marlin.
         #
         topk_weights, topk_ids, zero_expert_result = select_result
+
         if self.rocm_aiter_moe_enabled:
             from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import (  # noqa: E501
                 rocm_aiter_fused_experts,
diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py
@@ -181,10 +181,10 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
         orig_shape = hidden_states.shape
         num_tokens, hidden_dim = hidden_states.shape
         hidden_states = hidden_states.view(-1, hidden_dim)
+
         if self.is_sequence_parallel:
             hidden_states = sequence_parallel_chunk(hidden_states)
 
-<<<<<<< HEAD
         if self.experts.is_internal_router:
             # In this case, the gate/router runs inside the FusedMoE class
             final_hidden_states = self.experts(
@@ -196,14 +196,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
             final_hidden_states = self.experts(
                 hidden_states=hidden_states, router_logits=router_logits
             )
-=======
-        # print(self.prefix_print)
-        # router_logits: (num_tokens, n_experts)
-        router_logits, _ = self.gate(hidden_states)
-        final_hidden_states = self.experts(
-            hidden_states=hidden_states, router_logits=router_logits
-        )
->>>>>>> 9d88f1762 (update work)
 
         if self.shared_expert is not None:
             final_hidden_states = final_hidden_states[0] + final_hidden_states[1]
@@ -1013,7 +1005,6 @@ def forward(
                 {"hidden_states": hidden_states, "residual": residual}
             )
         hidden_states, _ = self.norm(hidden_states, residual)
-        # print("="*60)
         return hidden_states
 
     def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:

Original file line number	Diff line number	Diff line change
`@@ -1291,6 +1291,7 @@ def apply(`
`1291`	`1291`	`# can override fused_experts or cutlass but not rocm or marlin.`
`1292`	`1292`	`#`
`1293`	`1293`	`topk_weights, topk_ids, zero_expert_result = select_result`
	`1294`	`+`
`1294`	`1295`	`if self.rocm_aiter_moe_enabled:`
`1295`	`1296`	`from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501`
`1296`	`1297`	`rocm_aiter_fused_experts,`