From 15c3114465d94e3988f6a59f1cd5f760f0313f64 Mon Sep 17 00:00:00 2001 From: Ray Wan Date: Mon, 5 Aug 2024 04:40:22 +0000 Subject: [PATCH 1/2] added potential fix --- vllm/attention/backends/flash_attn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 26b3159682b3..5b2e18f1fa68 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -260,6 +260,8 @@ def _add_seq_group( elif ((chunked_prefill_enabled or not is_prompt) and block_tables is not None): block_table = block_tables[seq_id][-curr_sliding_window_block:] + elif block_table is not None: + block_table = block_tables[seq_id] self.block_tables.append(block_table) # Compute slot mapping. From ee1bb561c69c4cdf4b7adf1d59d2885687e1fadf Mon Sep 17 00:00:00 2001 From: Ray Wan <112235519+raywanb@users.noreply.github.com> Date: Tue, 6 Aug 2024 17:14:05 +0800 Subject: [PATCH 2/2] fix typo Co-authored-by: Woosuk Kwon --- vllm/attention/backends/flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 5b2e18f1fa68..027fb6d42891 100644 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -260,7 +260,7 @@ def _add_seq_group( elif ((chunked_prefill_enabled or not is_prompt) and block_tables is not None): block_table = block_tables[seq_id][-curr_sliding_window_block:] - elif block_table is not None: + elif block_tables is not None: block_table = block_tables[seq_id] self.block_tables.append(block_table)