@@ -1040,9 +1040,9 @@ def capture_model(self, kv_caches: List[List[torch.Tensor]]) -> None:
10401040 self .parallel_config .pipeline_parallel_size ):
10411041 for batch_size in reversed (batch_size_capture_list ):
10421042 if self .attn_backend .get_name () == "flashinfer" :
1043- indptr_buffer = indptr_buffer [:batch_size + 1 ]
1044- last_page_len_buffer = last_page_len_buffer [:
1045- batch_size ]
1043+ _indptr_buffer = indptr_buffer [:batch_size + 1 ]
1044+ _last_page_len_buffer = last_page_len_buffer [:
1045+ batch_size ]
10461046
10471047 num_qo_heads = (
10481048 self .model_config .get_num_attention_heads (
@@ -1055,8 +1055,8 @@ def capture_model(self, kv_caches: List[List[torch.Tensor]]) -> None:
10551055 use_tensor_cores = False
10561056 decode_wrapper = \
10571057 CUDAGraphBatchDecodeWithPagedKVCacheWrapper (
1058- decode_workspace_buffer , indptr_buffer ,
1059- indices_buffer , last_page_len_buffer , "NHD" ,
1058+ decode_workspace_buffer , _indptr_buffer ,
1059+ indices_buffer , _last_page_len_buffer , "NHD" ,
10601060 use_tensor_cores )
10611061 kv_cache_dtype = get_kv_cache_torch_dtype (
10621062 self .kv_cache_dtype , self .model_config .dtype )
@@ -1131,10 +1131,10 @@ def capture_model(self, kv_caches: List[List[torch.Tensor]]) -> None:
11311131 self .model , self .attn_backend .get_name ())
11321132
11331133 if self .attn_backend .get_name () == "flashinfer" :
1134- graph_runner .flashinfer_indptr_buffer = indptr_buffer
1134+ graph_runner .flashinfer_indptr_buffer = _indptr_buffer
11351135 graph_runner .flashinfer_indices_buffer = indices_buffer
11361136 graph_runner .flashinfer_last_page_len_buffer = \
1137- last_page_len_buffer
1137+ _last_page_len_buffer
11381138 graph_runner .flashinfer_decode_workspace_buffer = \
11391139 decode_workspace_buffer
11401140 graph_runner .flashinfer_decode_wrapper = \
0 commit comments