File tree Expand file tree Collapse file tree
python/sglang/srt/layers/moe/fused_moe_triton Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -750,9 +750,11 @@ def moe_align_block_size(
750750 by block_size for proper block matrix operations.
751751 """
752752 max_num_tokens_padded = topk_ids .numel () + num_experts * (block_size - 1 )
753- sorted_ids , cumsum_buffer = init_sorted_ids_and_cumsum_buffer (
754- max_num_tokens_padded , topk_ids . numel ( ), num_experts , topk_ids .device
753+ sorted_ids = torch . empty (
754+ ( max_num_tokens_padded ,), dtype = torch . int32 , device = topk_ids .device
755755 )
756+ sorted_ids .fill_ (topk_ids .numel ())
757+
756758 max_num_m_blocks = triton .cdiv (max_num_tokens_padded , block_size )
757759 expert_ids = torch .empty (
758760 (max_num_m_blocks ,), dtype = torch .int32 , device = topk_ids .device
@@ -768,6 +770,9 @@ def moe_align_block_size(
768770 num_tokens_post_pad ,
769771 )
770772 else :
773+ cumsum_buffer = torch .empty (
774+ (num_experts + 1 ,), dtype = torch .int32 , device = topk_ids .device
775+ )
771776 token_cnts_buffer = torch .empty (
772777 (num_experts + 1 ) * num_experts ,
773778 dtype = torch .int32 ,
You can’t perform that action at this time.
0 commit comments