From b1952dc995c58f6093f84df202feb339496ed28a Mon Sep 17 00:00:00 2001 From: "beihai.wgw" Date: Wed, 24 Sep 2025 16:28:19 +0800 Subject: [PATCH] fix bailing_moe hang with enable_dp_attention --- python/sglang/srt/models/bailing_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/bailing_moe.py b/python/sglang/srt/models/bailing_moe.py index 0797f4f6fac8..2d1929ead001 100644 --- a/python/sglang/srt/models/bailing_moe.py +++ b/python/sglang/srt/models/bailing_moe.py @@ -45,12 +45,12 @@ get_attention_dp_size, get_attention_tp_rank, get_attention_tp_size, + is_dp_attention_enabled, ) from sglang.srt.layers.layernorm import RMSNorm from sglang.srt.layers.linear import ( MergedColumnParallelLinear, QKVParallelLinear, - ReplicatedLinear, RowParallelLinear, ) from sglang.srt.layers.logits_processor import LogitsProcessor @@ -702,7 +702,7 @@ def __init__( self.embed_dim, quant_config=quant_config, prefix=add_prefix("word_embeddings", prefix), - use_attn_tp_group=global_server_args_dict["enable_dp_lm_head"], + enable_tp=not is_dp_attention_enabled(), ) else: self.word_embeddings = PPMissingLayer()