diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 26b3159682b3..027fb6d42891 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -260,6 +260,8 @@ def _add_seq_group( elif ((chunked_prefill_enabled or not is_prompt) and block_tables is not None): block_table = block_tables[seq_id][-curr_sliding_window_block:] + elif block_tables is not None: + block_table = block_tables[seq_id] self.block_tables.append(block_table) # Compute slot mapping.