@@ -20,7 +20,6 @@ __global__ void expandInputRowsKernel(
2020 int expert_id = sorted_experts[expanded_dest_row];
2121
2222 extern __shared__ int64_t smem_expert_first_token_offset[];
23- int64_t align_expanded_row_accumulate = 0 ;
2423 if constexpr (ALIGN_BLOCK_SIZE) {
2524 // load g2s
2625 for (int idx = threadIdx.x ; idx < num_local_experts + 1 ;
@@ -63,7 +62,6 @@ __global__ void expandInputRowsKernel(
6362 using DataElem = cutlass::Array<T, ELEM_PER_THREAD>;
6463
6564 // Duplicate and permute rows
66- int64_t const source_k_rank = expanded_source_row / num_rows;
6765 int64_t const source_row = expanded_source_row % num_rows;
6866
6967 auto const * source_row_ptr =
@@ -160,7 +158,6 @@ __global__ void finalizeMoeRoutingKernel(
160158 elem_index += stride) {
161159 ComputeElem thread_output;
162160 thread_output.fill (0 );
163- float row_rescale{0 .f };
164161 for (int k_idx = 0 ; k_idx < k; ++k_idx) {
165162 int64_t const expanded_original_row = original_row + k_idx * num_rows;
166163 int64_t const expanded_permuted_row =
@@ -177,8 +174,6 @@ __global__ void finalizeMoeRoutingKernel(
177174 auto const * expanded_permuted_rows_row_ptr =
178175 expanded_permuted_rows_v + expanded_permuted_row * num_elems_in_col;
179176
180- int64_t const expert_idx = expert_for_source_row[k_offset];
181-
182177 ComputeElem expert_result = arrayConvert<InputElem, ComputeElem>(
183178 expanded_permuted_rows_row_ptr[elem_index]);
184179 thread_output = thread_output + row_scale * (expert_result);
0 commit comments