sgl-project · zhyncs · Jul 20, 2025 · Jul 7, 2025 · Jul 9, 2025 · Jul 10, 2025
@@ -66,7 +66,7 @@ def transform_select_experts_inputs(
     info: Optional[ExpertLocationDispatchInfo],
 ):
     if (info is not None) and (info.ep_dispatch_algorithm == "fake"):
-        router_logits = torch.randn_like(router_logits)
+        router_logits.uniform_(5, 10)
         if correction_bias is not None:
             correction_bias = torch.zeros_like(correction_bias)
     return router_logits, correction_bias

@@ -320,7 +320,7 @@ def biased_grouped_topk_gpu(
         and is_power_of_two(correction_bias.shape[0])
     ):
         topk_weights, topk_ids = moe_fused_gate(
-            gating_output,
+            gating_output.to(dtype=torch.float32),
             correction_bias,
             num_expert_group,
             topk_group,

@@ -219,7 +219,7 @@ def __init__(
         )
         if config.topk_method == "noaux_tc":
             self.e_score_correction_bias = nn.Parameter(
-                torch.empty((config.n_routed_experts))
+                torch.empty((config.n_routed_experts), dtype=torch.float32)
             )
         else:
             self.e_score_correction_bias = None

diff --git a/python/sglang/srt/two_batch_overlap.py b/python/sglang/srt/two_batch_overlap.py
@@ -490,6 +490,7 @@ def filter_batch(
         output_dict["spec_info"] = output_spec_info
         for key in [
             "forward_mode",
+            "is_extend_in_batch",
             "return_logprob",
             "req_to_token_pool",
             "token_to_kv_pool",