Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e92a8cf
[V0 Deprecation] Remove AsyncLLMEngine
WoosukKwon Sep 17, 2025
9a54905
Merge branch 'main' into woosuk/remove-async-llm-engine
WoosukKwon Sep 17, 2025
6c89e62
fix assert false
WoosukKwon Sep 17, 2025
63f124d
merge
WoosukKwon Sep 17, 2025
f63f899
[V0 Deprecation] Remove LLMEngine
WoosukKwon Sep 17, 2025
9186476
merge
WoosukKwon Sep 17, 2025
65b3990
merge
WoosukKwon Sep 17, 2025
51a326d
fix
WoosukKwon Sep 17, 2025
8c2eb56
revert
WoosukKwon Sep 17, 2025
7a92f17
fix test_chat
WoosukKwon Sep 17, 2025
d80a455
fix pp test
WoosukKwon Sep 17, 2025
9bb81fe
fix
WoosukKwon Sep 17, 2025
c855f92
rm more tests
WoosukKwon Sep 18, 2025
c12bc3e
fix
WoosukKwon Sep 18, 2025
3d7c361
fix
WoosukKwon Sep 18, 2025
c17fb8f
[V0 Deprecation] Remove more V0 tests
WoosukKwon Sep 18, 2025
9011ad2
minor
WoosukKwon Sep 18, 2025
2d60e15
fix
WoosukKwon Sep 18, 2025
7e3535c
Merge branch 'main' into woosuk/rm-more-v0-tests
WoosukKwon Sep 18, 2025
4e42d0c
Merge branch 'main' into woosuk/rm-more-v0-tests
WoosukKwon Sep 18, 2025
9df17d4
Merge branch 'woosuk/rm-more-v0-tests' into woosuk/remove-async-llm-e…
WoosukKwon Sep 18, 2025
4de8eda
update
WoosukKwon Sep 18, 2025
679bf7b
Merge branch 'main' into woosuk/remove-async-llm-engine
WoosukKwon Sep 18, 2025
d2cd2a4
merge
WoosukKwon Sep 18, 2025
c846648
merge
WoosukKwon Sep 18, 2025
4d356ef
rm v0 tests
WoosukKwon Sep 18, 2025
91dd5db
Merge branch 'main' into woosuk/remove-llm-engine
WoosukKwon Sep 19, 2025
277ef29
rm
WoosukKwon Sep 19, 2025
c05504e
minor
WoosukKwon Sep 19, 2025
5fe855d
merge
WoosukKwon Sep 20, 2025
60b94e6
Remove codeowners
WoosukKwon Sep 20, 2025
35c121f
fix
WoosukKwon Sep 20, 2025
becf74c
fix
WoosukKwon Sep 20, 2025
2104774
fix
WoosukKwon Sep 20, 2025
a17686b
fix
WoosukKwon Sep 20, 2025
f2b7215
rm fp8 kv cache fallback & profiling
WoosukKwon Sep 21, 2025
dddbd5e
skip
WoosukKwon Sep 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ steps:
mirror_hardwares: [amdexperimental]
source_file_dependencies:
- vllm/
- tests/async_engine
- tests/test_inputs.py
- tests/test_outputs.py
- tests/multimodal
Expand All @@ -56,7 +55,6 @@ steps:
- tests/transformers_utils
commands:
- python3 standalone_tests/lazy_imports.py
- pytest -v -s async_engine # AsyncLLMEngine
- pytest -v -s test_inputs.py
- pytest -v -s test_outputs.py
- pytest -v -s multimodal
Expand Down
Empty file removed tests/async_engine/__init__.py
Empty file.
54 changes: 0 additions & 54 deletions tests/async_engine/api_server_async_engine.py

This file was deleted.

12 changes: 0 additions & 12 deletions tests/async_engine/conftest.py

This file was deleted.

139 changes: 0 additions & 139 deletions tests/async_engine/test_api_server.py

This file was deleted.

71 changes: 0 additions & 71 deletions tests/async_engine/test_request_tracker.py

This file was deleted.

36 changes: 0 additions & 36 deletions tests/core/test_chunked_prefill_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,8 @@

from unittest.mock import MagicMock

import pytest # noqa

from vllm.config import CacheConfig, SchedulerConfig
from vllm.core.scheduler import Scheduler
from vllm.engine.arg_utils import EngineArgs
from vllm.engine.llm_engine import LLMEngine
from vllm.sampling_params import SamplingParams
from vllm.sequence import Logprob, SequenceGroup

from .utils import create_dummy_prompt
Expand Down Expand Up @@ -825,34 +820,3 @@ def test_prefix_caching_with_concurrent_partial_prefills():
assert seq_group_meta[1].token_chunk_size == 22
assert out.num_prefill_groups == 2
assert out.num_batched_tokens == 44


@pytest.mark.parametrize("model", ["facebook/opt-125m"])
@pytest.mark.parametrize("max_num_partial_prefills", [2, 4, 8])
def test_chunked_prefill_with_actual_engine(model: str,
max_num_partial_prefills: int):
"""Make sure the model can actually sample with concurrent
partial prefills
"""

prompt = "hello" * 40

engine_args = EngineArgs(
model=model,
max_num_partial_prefills=max_num_partial_prefills,
max_num_batched_tokens=40,
max_num_seqs=8,
enable_chunked_prefill=True,
gpu_memory_utilization=0.8,
)

engine = LLMEngine.from_engine_args(engine_args)
sampling_params = SamplingParams(temperature=0)

for req_num in range(max_num_partial_prefills):
engine.add_request(f"{req_num}", prompt, sampling_params)
# first step
request_outputs = engine.step()
# means all are prefilling
assert len(request_outputs) == 0
assert len(engine.scheduler[0].running) == max_num_partial_prefills
Loading
Loading