We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 750838a commit 465968bCopy full SHA for 465968b
1 file changed
python/sglang/srt/layers/moe/topk.py
@@ -524,7 +524,7 @@ def biased_grouped_topk_gpu(
524
topk_weights = torch.empty((token, topk), dtype=torch.float32, device=device)
525
topk_ids = torch.empty((token, topk), dtype=torch.int32, device=device)
526
aiter_biased_grouped_topk(
527
- gating_output,
+ gating_output.to(dtype=torch.float32),
528
correction_bias,
529
topk_weights,
530
topk_ids,
0 commit comments