vllm-project · WoosukKwon · Sep 21, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -46,7 +46,6 @@ steps:
   mirror_hardwares: [amdexperimental]
   source_file_dependencies:
   - vllm/
-  - tests/async_engine
   - tests/test_inputs.py
   - tests/test_outputs.py
   - tests/multimodal
@@ -56,7 +55,6 @@ steps:
   - tests/transformers_utils
   commands:
   - python3 standalone_tests/lazy_imports.py
-  - pytest -v -s async_engine # AsyncLLMEngine
   - pytest -v -s test_inputs.py
   - pytest -v -s test_outputs.py
   - pytest -v -s multimodal

diff --git a/tests/async_engine/__init__.py b/tests/async_engine/__init__.py
diff --git a/tests/async_engine/api_server_async_engine.py b/tests/async_engine/api_server_async_engine.py
diff --git a/tests/async_engine/conftest.py b/tests/async_engine/conftest.py
diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py
diff --git a/tests/async_engine/test_request_tracker.py b/tests/async_engine/test_request_tracker.py
diff --git a/tests/core/test_chunked_prefill_scheduler.py b/tests/core/test_chunked_prefill_scheduler.py
@@ -3,13 +3,8 @@
 
 from unittest.mock import MagicMock
 
-import pytest  # noqa
-
 from vllm.config import CacheConfig, SchedulerConfig
 from vllm.core.scheduler import Scheduler
-from vllm.engine.arg_utils import EngineArgs
-from vllm.engine.llm_engine import LLMEngine
-from vllm.sampling_params import SamplingParams
 from vllm.sequence import Logprob, SequenceGroup
 
 from .utils import create_dummy_prompt
@@ -825,34 +820,3 @@ def test_prefix_caching_with_concurrent_partial_prefills():
     assert seq_group_meta[1].token_chunk_size == 22
     assert out.num_prefill_groups == 2
     assert out.num_batched_tokens == 44
-
-
-@pytest.mark.parametrize("model", ["facebook/opt-125m"])
-@pytest.mark.parametrize("max_num_partial_prefills", [2, 4, 8])
-def test_chunked_prefill_with_actual_engine(model: str,
-                                            max_num_partial_prefills: int):
-    """Make sure the model can actually sample with concurrent 
-    partial prefills
-    """
-
-    prompt = "hello" * 40
-
-    engine_args = EngineArgs(
-        model=model,
-        max_num_partial_prefills=max_num_partial_prefills,
-        max_num_batched_tokens=40,
-        max_num_seqs=8,
-        enable_chunked_prefill=True,
-        gpu_memory_utilization=0.8,
-    )
-
-    engine = LLMEngine.from_engine_args(engine_args)
-    sampling_params = SamplingParams(temperature=0)
-
-    for req_num in range(max_num_partial_prefills):
-        engine.add_request(f"{req_num}", prompt, sampling_params)
-    # first step
-    request_outputs = engine.step()
-    # means all are prefilling
-    assert len(request_outputs) == 0
-    assert len(engine.scheduler[0].running) == max_num_partial_prefills