Skip to content

Commit 8e6b65b

Browse files
WoosukKwoncharlifu
authored andcommitted
[V0 Deprecation] Remove LLMEngine (vllm-project#25033)
Signed-off-by: Woosuk Kwon <[email protected]> Signed-off-by: Woosuk Kwon <[email protected]> Signed-off-by: charlifu <[email protected]>
1 parent ef3794d commit 8e6b65b

File tree

29 files changed

+66
-2764
lines changed

29 files changed

+66
-2764
lines changed

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,6 @@ if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then
8686
commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
8787
fi
8888

89-
if [[ $commands == *"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"* ]]; then
90-
commands=${commands//"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"/"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2 and not BambaForCausalLM and not Gemma2ForCausalLM and not Grok1ModelForCausalLM and not Zamba2ForCausalLM and not Gemma2Model and not GritLM'"}
91-
fi
92-
9389
if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
9490
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
9591
fi

.buildkite/test-pipeline.yaml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ steps:
110110
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
111111
- pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py
112112
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
113-
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
113+
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
114114

115115
- label: Entrypoints Integration Test (API Server) # 100min
116116
timeout_in_minutes: 130
@@ -163,7 +163,6 @@ steps:
163163
- tests/v1/engine/test_engine_core_client.py
164164
commands:
165165
# test with tp=2 and external_dp=2
166-
- VLLM_USE_V1=0 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
167166
- torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
168167
# test with tp=2 and pp=2
169168
- PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py
@@ -314,12 +313,11 @@ steps:
314313
- python3 offline_inference/vision_language.py --seed 0
315314
- python3 offline_inference/vision_language_pooling.py --seed 0
316315
- python3 offline_inference/vision_language_multi_image.py --seed 0
317-
- VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
316+
- python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
318317
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
319318
- python3 offline_inference/basic/classify.py
320319
- python3 offline_inference/basic/embed.py
321320
- python3 offline_inference/basic/score.py
322-
- VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
323321

324322
- label: Platform Tests (CUDA) # 4min
325323
timeout_in_minutes: 15
@@ -894,7 +892,7 @@ steps:
894892
- pytest -v -s distributed/test_sequence_parallel.py
895893
# this test fails consistently.
896894
# TODO: investigate and fix
897-
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
895+
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
898896
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
899897
- pytest -v -s models/multimodal/generation/test_maverick.py
900898

.github/CODEOWNERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
/vllm/attention @LucasWilkinson
66
/vllm/attention/backends/abstract.py @WoosukKwon @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill
77
/vllm/core @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill
8-
/vllm/engine/llm_engine.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill
98
/vllm/executor/executor_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn
109
/vllm/worker/worker_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn
1110
/vllm/worker/worker.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill

0 commit comments

Comments
 (0)