Skip to content

Commit a3b7103

Browse files
committed
Add clarifying comment on modulo optimization
1 parent e638e65 commit a3b7103

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

cuda/src/poly/eval_at_point.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,9 @@ __global__ void eval_many_at_point_first_pass(m31 **g_coeffs, qm31 *temp, qm31 *
175175

176176
int poly_index = blockIdx.x / blocks_in_poly;
177177

178-
s_coeffs[idx] = g_coeffs[poly_index][(blockIdx.x % blocks_in_poly) * coeffs_size + idx];
179-
s_coeffs[idx + blockDim.x] = g_coeffs[poly_index][(blockIdx.x % blocks_in_poly) * coeffs_size + idx + blockDim.x];
178+
// A % X == A & (X-1) when X is a power of two
179+
s_coeffs[idx] = g_coeffs[poly_index][(blockIdx.x & (blocks_in_poly - 1)) * coeffs_size + idx];
180+
s_coeffs[idx + blockDim.x] = g_coeffs[poly_index][(blockIdx.x & (blocks_in_poly - 1)) * coeffs_size + idx + blockDim.x];
180181
__syncthreads();
181182

182183
int level_size = coeffs_size >> 1;

0 commit comments

Comments
 (0)