diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 41d9f1b65c23..33cc33beb2ae 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -291,6 +291,7 @@ def allocate_slots( # cache a block after is has finished recving. self.num_cached_block[request.request_id] = len( new_computed_blocks) + return new_blocks self.cache_blocks( request=request,