From 710e1641424821f3f9857600d97f3a2f11c82adb Mon Sep 17 00:00:00 2001 From: zRzRzRzRzRzRzR <2448370773@qq.com> Date: Sun, 3 Aug 2025 20:30:40 +0800 Subject: [PATCH] fuse fp32 for glm-4.5 --- python/sglang/srt/models/glm4_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index badbb56ca861..76f954578ad3 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -343,7 +343,7 @@ def __init__( torch.empty((config.n_routed_experts, config.hidden_size)) ) self.e_score_correction_bias = nn.Parameter( - torch.empty((config.n_routed_experts)) + torch.empty((config.n_routed_experts), dtype=torch.float32) ) if _is_cpu and _is_cpu_amx_available: self.quant_method = PackWeightMethod(weight_names=["weight"])