Skip to content
Merged
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5447dc5
Replace the block_pool list with the vLLM Block Pool
maxdebayser Nov 27, 2025
a43e072
manage padding blocks outside of block pool
maxdebayser Nov 28, 2025
6378294
Switch to Single Type KV Cache manager
maxdebayser Nov 30, 2025
5f188ac
Add prefix caching
maxdebayser Nov 30, 2025
ed7441f
Fix small errors
maxdebayser Nov 30, 2025
6983f2a
Fix prefix caching path
maxdebayser Dec 1, 2025
f8dd1d2
fix linting problem
maxdebayser Dec 1, 2025
78a8b84
Merge branch 'integrate_block_pool' into sched_agnostic_pc
maxdebayser Dec 1, 2025
0296747
address review comments
maxdebayser Dec 1, 2025
8420160
Merge branch 'main' into integrate_block_pool
maxdebayser Dec 2, 2025
fbaf933
Merge branch 'integrate_block_pool' into sched_agnostic_pc
maxdebayser Dec 2, 2025
ab2f4d3
fix mispelling
maxdebayser Dec 2, 2025
a514310
address some review comments
maxdebayser Dec 2, 2025
ec9b1f6
tmp hack: run tests on this branch
yannicks1 Dec 3, 2025
76059a4
fix: tmp hack to run tests
yannicks1 Dec 3, 2025
41d3ea9
fix bug when no cache is found
maxdebayser Dec 3, 2025
dc92a9b
add first unit test prefix caching
yannicks1 Dec 3, 2025
041b3fa
disable prefix caching by default and enable tests
maxdebayser Dec 4, 2025
a491e7d
Merge branch 'main' into integrate_block_pool
maxdebayser Dec 4, 2025
f71df91
address review comments
maxdebayser Dec 4, 2025
e14acff
Merge branch 'integrate_block_pool' into sched_agnostic_pc
maxdebayser Dec 4, 2025
4808379
reduce test repetition
maxdebayser Dec 4, 2025
f201675
revert bad change
maxdebayser Dec 4, 2025
4d4a228
Merge branch 'integrate_block_pool' into sched_agnostic_pc
maxdebayser Dec 4, 2025
a11a9b5
add test: prefix hit of a seq not part of the batch.
yannicks1 Dec 4, 2025
fa51002
reset prefixes across tests in cached engine
yannicks1 Dec 4, 2025
78c9141
Merge branch 'main' into sched_agnostic_pc
maxdebayser Dec 4, 2025
5c45428
adding tests: limit number of blocks
yannicks1 Dec 4, 2025
8c96eec
Merge branch 'main' into sched_agnostic_pc
yannicks1 Dec 4, 2025
4823a7f
Merge branch 'main' into sched_agnostic_pc
yannicks1 Dec 8, 2025
fe8d383
Fix test
maxdebayser Dec 8, 2025
a1d140b
Merge branch 'sched_agnostic_pc' of github.com:vllm-project/vllm-spyr…
maxdebayser Dec 8, 2025
882a530
revert tmp hack
yannicks1 Dec 8, 2025
cbc2b35
fix isort
yannicks1 Dec 8, 2025
ddbbeec
add more tests and fix a small bug in the model runner
maxdebayser Dec 8, 2025
9559d46
appease linter
maxdebayser Dec 8, 2025
9247e92
Merge branch 'sched_agnostic_pc' of github.com:vllm-project/vllm-spyr…
maxdebayser Dec 8, 2025
fd7cc90
update hf_cache
maxdebayser Dec 8, 2025
2fa71e5
address review comments
maxdebayser Dec 9, 2025
5495953
address review comments
maxdebayser Dec 9, 2025
d9c4d4a
improve comment
maxdebayser Dec 9, 2025
f0de597
Merge branch 'main' into sched_agnostic_pc
maxdebayser Dec 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,13 @@ def pytest_generate_tests(metafunc):
# markers
if ("mode" in metafunc.fixturenames and "cb" not in existing_markers
and "chunked_prefill" not in existing_markers
and "cp" not in existing_markers and "pc" not in existing_markers
and "mode" not in existing_markers):
metafunc.parametrize("mode", [
"sb",
pytest.param("cb", marks=pytest.mark.cb, id="cb"),
pytest.param("cp", marks=pytest.mark.chunked_prefill, id="cp")
pytest.param("cp", marks=pytest.mark.chunked_prefill, id="cp"),
pytest.param("pc", marks=pytest.mark.prefix_caching, id="pc")
])


Expand Down Expand Up @@ -252,7 +254,7 @@ def remote_openai_server(request):
skip_unsupported_tp_size(int(tp_size), backend)
server_args.extend(["--tensor-parallel-size", str(tp_size)])

if "mode" in params and params["mode"] in ["cb", "cp"]:
if "mode" in params and params["mode"] in ["cb", "cp", "pc"]:
max_model_len = params["max_model_len"]
max_num_seqs = params["max_num_seqs"]
env_dict = {
Expand All @@ -265,12 +267,16 @@ def remote_openai_server(request):
str(max_model_len)
])
# Chunked prefill extra
if params["mode"] == "cp":
if params["mode"] in ["cp", "pc"]:
env_dict.update({"VLLM_SPYRE_USE_CHUNKED_PREFILL": "1"})
server_args.extend([
"--max_num_batched_tokens",
str(128),
])
if params["mode"] == "pc":
server_args.extend([
"--enable-prefix-caching",
])

else:
warmup_shapes = params['warmup_shapes']
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/test_logits_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
import torch
from llm_cache import patch_environment
from llm_cache_util import force_engine_shutdown
from spyre_util import ModelInfo
from vllm import LLM, SamplingParams
from vllm.config import VllmConfig
Expand Down Expand Up @@ -57,6 +58,7 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
params = SamplingParams(max_tokens=5, temperature=0, logprobs=0)

spyre_model.generate(prompt, params)
force_engine_shutdown(spyre_model)

assert has_invoked_logits_processor

Expand Down Expand Up @@ -154,6 +156,7 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor:
spy_outputs = {}
params = [params0, params1, params2]
outputs = spyre_model.generate(prompt, params)
force_engine_shutdown(spyre_model)

assert spy_outputs[5] == outputs[0].outputs[0].token_ids
assert spy_outputs[10] == outputs[1].outputs[0].token_ids
Expand Down
7 changes: 7 additions & 0 deletions tests/e2e/test_sampling_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@

pytestmark = [pytest.mark.full_model, pytest.mark.other_e2e]

sb_mark = pytest.param("sb", marks=pytest.mark.sb, id="sb")
cb_mark = pytest.param("cb", marks=pytest.mark.cb, id="cb")
cp_mark = pytest.param("cp", marks=pytest.mark.chunked_prefill, id="cp")


def test_spyre_batch1_temperature(model: ModelInfo, backend, monkeypatch,
use_llm_cache, warmup_shapes):
Expand Down Expand Up @@ -212,6 +216,7 @@ def test_spyre_batch1_top_k(model: ModelInfo, backend, monkeypatch,
assert token_div1 < token_div2


@pytest.mark.parametrize("mode", [sb_mark, cb_mark, cp_mark])
def test_spyre_batch1_logit_bias(model: ModelInfo, backend, monkeypatch,
use_llm_cache, warmup_shapes, max_model_len,
max_num_seqs, mode: str):
Expand Down Expand Up @@ -253,6 +258,7 @@ def test_spyre_batch1_logit_bias(model: ModelInfo, backend, monkeypatch,
assert output[0].outputs[0].text != output[1].outputs[0].text


@pytest.mark.parametrize("mode", [sb_mark, cb_mark, cp_mark])
def test_spyre_batch1_min_tokens(model: ModelInfo, backend, monkeypatch,
use_llm_cache, max_model_len, max_num_seqs,
warmup_shapes, mode: str):
Expand Down Expand Up @@ -322,6 +328,7 @@ def test_spyre_batch1_ignore_eos(model: ModelInfo, backend, monkeypatch,
assert output2.outputs[0].finish_reason != 'length'


@pytest.mark.parametrize("mode", [sb_mark, cb_mark, cp_mark])
def test_spyre_batch1_min_p(model: ModelInfo, backend, monkeypatch,
use_llm_cache, max_model_len, max_num_seqs,
warmup_shapes, mode: str):
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/test_spyre_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_max_model_len_override(model: ModelInfo, backend, warmup_shapes,
"use_cb": True,
"warmup_shapes": None,
"use_chunked_prefill": mode == "cp",
} if mode in ["cb", "cp"] else {
} if mode in ["cb", "cp", "pc"] else {
"use_cb": False,
"warmup_shapes": warmup_shapes,
})
Expand Down
5 changes: 5 additions & 0 deletions tests/e2e/test_spyre_max_new_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@
from spyre_util import DecodeWarmupShapes, ModelInfo, get_chicken_soup_prompts
from vllm import SamplingParams

sb_mark = pytest.param("sb", marks=pytest.mark.sb, id="sb")
cb_mark = pytest.param("cb", marks=pytest.mark.cb, id="cb")
cp_mark = pytest.param("cp", marks=pytest.mark.chunked_prefill, id="cp")


@pytest.mark.parametrize("stop_last", [True, False])
@pytest.mark.parametrize("mode", [sb_mark, cb_mark, cp_mark])
def test_output(model: ModelInfo, stop_last: bool, max_model_len: int,
max_num_seqs: int, warmup_shapes: DecodeWarmupShapes,
backend: str, mode: str, monkeypatch: pytest.MonkeyPatch,
Expand Down
Loading