Skip to content

Commit 8c0d15d

Browse files
authored
[Misc][Easy] Annotate unused vars in the csrc files (#14798)
Signed-off-by: Lu Fang <[email protected]>
1 parent 97ac781 commit 8c0d15d

File tree

4 files changed

+14
-13
lines changed

4 files changed

+14
-13
lines changed

csrc/prepare_inputs/advance_step.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ void advance_step_flashinfer(
274274
cudaDeviceGetAttribute(&blocks, cudaDevAttrMultiProcessorCount, dev);
275275
cudaDeviceGetAttribute(&threads, cudaDevAttrMaxThreadsPerBlock, dev);
276276

277-
int block_tables_stride = block_tables.stride(0);
277+
[[maybe_unused]] int block_tables_stride = block_tables.stride(0);
278278
TORCH_CHECK((blocks * threads > num_queries),
279279
"multi-step: not enough threads to map to num_queries = ",
280280
num_queries, " block_tables.stride(0) = ", block_tables.stride(0),

csrc/quantization/fp8/amd/quant_utils.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ scaled_vec_conversion<uint16_t, uint8_t>(const uint8_t& a, float scale) {
446446
template <>
447447
__inline__ __device__ uint32_t
448448
scaled_vec_conversion<uint32_t, uint16_t>(const uint16_t& a, float scale) {
449-
__half2_raw h2r =
449+
[[maybe_unused]] __half2_raw h2r =
450450
__hip_cvt_fp8x2_to_halfraw2(a, fp8_type::__default_interpret);
451451
union {
452452
__half2_raw h2r;

csrc/quantization/gptq/q_gemm.cu

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ __global__ void gemm_half_q_half_gptq_4bit_kernel(
206206
int offset_m = blockIdx.y * m_count;
207207
int offset_k = blockIdx.z * BLOCK_KN_SIZE;
208208

209-
int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
210-
int end_m = min(offset_m + m_count, size_m);
209+
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
210+
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
211211
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
212212

213213
int n = offset_n + t * 4;
@@ -344,8 +344,8 @@ __global__ void gemm_half_q_half_gptq_2bit_kernel(
344344
int offset_m = blockIdx.y * m_count;
345345
int offset_k = blockIdx.z * BLOCK_KN_SIZE;
346346

347-
int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
348-
int end_m = min(offset_m + m_count, size_m);
347+
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
348+
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
349349
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
350350

351351
int n = offset_n + t * 4;
@@ -465,8 +465,8 @@ __global__ void gemm_half_q_half_gptq_3bit_kernel(
465465
int offset_m = blockIdx.y * m_count;
466466
int offset_k = blockIdx.z * BLOCK_KN_SIZE;
467467

468-
int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
469-
int end_m = min(offset_m + m_count, size_m);
468+
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
469+
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
470470
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
471471

472472
int n = offset_n + t * 4;
@@ -593,8 +593,8 @@ __global__ void gemm_half_q_half_gptq_8bit_kernel(
593593
int offset_m = blockIdx.y * m_count;
594594
int offset_k = blockIdx.z * BLOCK_KN_SIZE;
595595

596-
int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
597-
int end_m = min(offset_m + m_count, size_m);
596+
[[maybe_unused]] int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
597+
[[maybe_unused]] int end_m = min(offset_m + m_count, size_m);
598598
int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
599599

600600
int n = offset_n + t * 4;

csrc/rocm/attention.cu

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,8 @@ __launch_bounds__(NUM_THREADS, 5) void paged_attention_ll4mi_QKV_mfma16_kernel(
308308

309309
constexpr int GQA_RATIO4 = DIVIDE_ROUND_UP(GQA_RATIO, 4);
310310

311-
__shared__ float shared_qk_max[NWARPS][16 + 1];
312-
__shared__ float shared_exp_sum[NWARPS][16 + 1];
311+
[[maybe_unused]] __shared__ float shared_qk_max[NWARPS][16 + 1];
312+
[[maybe_unused]] __shared__ float shared_exp_sum[NWARPS][16 + 1];
313313
// shared_logits is used for multiple purposes
314314
__shared__ _B16x4 shared_logits[NWARPS][4][16][4];
315315

@@ -426,7 +426,8 @@ __launch_bounds__(NUM_THREADS, 5) void paged_attention_ll4mi_QKV_mfma16_kernel(
426426
const cache_t* k_ptr2 = k_ptr + kblock_number * kv_block_stride;
427427
const int klocal_token_idx =
428428
TOKENS_PER_WARP * warpid + token_depth * 16 + lane16id;
429-
const int kglobal_token_idx = partition_start_token_idx + klocal_token_idx;
429+
[[maybe_unused]] const int kglobal_token_idx =
430+
partition_start_token_idx + klocal_token_idx;
430431
const int kphysical_block_offset = klocal_token_idx % BLOCK_SIZE;
431432
const cache_t* k_ptr3 = k_ptr2 + kphysical_block_offset * KX;
432433

0 commit comments

Comments
 (0)