99# label(str): the name of the test. emoji allowed.
1010# fast_check(bool): whether to run this on each commit on fastcheck pipeline.
1111# fast_check_only(bool): run this test on fastcheck pipeline only
12+ # optional(bool): never run this test by default (i.e. need to unblock manually)
1213# command(str): the single command to run for tests. incompatible with commands.
1314# commands(list): the list of commands to run for test. incompatbile with command.
1415# mirror_hardwares(list): the list of hardwares to run the test on as well. currently only supports [amd]
3940 # Check API reference (if it fails, you may have missing mock imports)
4041 - grep \"sig sig-object py\" build/html/dev/sampling_params.html
4142
42- - label : Async Engine, Inputs, Utils, Worker Test # 15min
43+ - label : Async Engine, Inputs, Utils, Worker Test # 24min
4344 fast_check : true
4445 source_file_dependencies :
4546 - vllm/
8182 commands :
8283 - pytest -v -s core
8384
84- - label : Entrypoints Test # 20min
85+ - label : Entrypoints Test # 40min
8586 working_dir : " /vllm-workspace/tests"
8687 fast_check : true
8788 mirror_hardwares : [amd]
@@ -151,7 +152,7 @@ steps:
151152 # OOM in the CI unless we run this separately
152153 - pytest -v -s tokenization
153154
154- - label : Examples Test # 12min
155+ - label : Examples Test # 15min
155156 working_dir : " /vllm-workspace/examples"
156157 # mirror_hardwares: [amd]
157158 source_file_dependencies :
@@ -169,15 +170,15 @@ steps:
169170 - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
170171 - python3 offline_inference_encoder_decoder.py
171172
172- - label : Prefix Caching Test # 7min
173+ - label : Prefix Caching Test # 9min
173174 # mirror_hardwares: [amd]
174175 source_file_dependencies :
175176 - vllm/
176177 - tests/prefix_caching
177178 commands :
178179 - pytest -v -s prefix_caching
179180
180- - label : Samplers Test # 18min
181+ - label : Samplers Test # 36min
181182 source_file_dependencies :
182183 - vllm/model_executor/layers
183184 - vllm/sampling_metadata.py
@@ -193,7 +194,7 @@ steps:
193194 - tests/test_logits_processor
194195 command : pytest -v -s test_logits_processor.py
195196
196- - label : Speculative decoding tests # 22min
197+ - label : Speculative decoding tests # 30min
197198 source_file_dependencies :
198199 - vllm/spec_decode
199200 - tests/spec_decode
@@ -203,30 +204,30 @@ steps:
203204 - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
204205 - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
205206
206- - label : LoRA Test %N # 30min each
207+ - label : LoRA Test %N # 15min each
207208 mirror_hardwares : [amd]
208209 source_file_dependencies :
209210 - vllm/lora
210211 - tests/lora
211212 command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
212213 parallelism : 4
213214
214- - label : " PyTorch Fullgraph Smoke Test"
215+ - label : " PyTorch Fullgraph Smoke Test" # 9min
215216 fast_check : true
216217 source_file_dependencies :
217218 - vllm/
218219 - tests/compile
219220 commands :
220221 - pytest -v -s compile/test_full_graph_smoke.py
221222
222- - label : " PyTorch Fullgraph Test"
223+ - label : " PyTorch Fullgraph Test" # 18min
223224 source_file_dependencies :
224225 - vllm/
225226 - tests/compile
226227 commands :
227228 - pytest -v -s compile/test_full_graph.py
228229
229- - label : Kernels Test %N # 30min each
230+ - label : Kernels Test %N # 1h each
230231 mirror_hardwares : [amd]
231232 source_file_dependencies :
232233 - csrc/
@@ -256,7 +257,7 @@ steps:
256257 - pip install aiohttp
257258 - bash run-benchmarks.sh
258259
259- - label : Quantization Test # 15min
260+ - label : Quantization Test # 33min
260261 source_file_dependencies :
261262 - csrc/
262263 - vllm/model_executor/layers/quantization
@@ -300,15 +301,15 @@ steps:
300301 - pytest -v -s models/test_oot_registration.py # it needs a clean process
301302 - pytest -v -s models/*.py --ignore=models/test_oot_registration.py
302303
303- - label : Decoder-only Language Models Test # 1h3min
304+ - label : Decoder-only Language Models Test # 1h36min
304305 # mirror_hardwares: [amd]
305306 source_file_dependencies :
306307 - vllm/
307308 - tests/models/decoder_only/language
308309 commands :
309310 - pytest -v -s models/decoder_only/language
310311
311- - label : Decoder-only Multi-Modal Models Test # 56min
312+ - label : Decoder-only Multi-Modal Models Test # 1h31min
312313 # mirror_hardwares: [amd]
313314 source_file_dependencies :
314315 - vllm/
@@ -318,15 +319,25 @@ steps:
318319 - pytest -v -s models/decoder_only/audio_language
319320 - pytest -v -s models/decoder_only/vision_language
320321
321- - label : Other Models Test # 5min
322+ - label : Other Models Test # 6min
322323 # mirror_hardwares: [amd]
323324 source_file_dependencies :
324325 - vllm/
325326 - tests/models/embedding/language
326327 - tests/models/encoder_decoder/language
328+ - tests/models/encoder_decoder/vision_language
327329 commands :
328330 - pytest -v -s models/embedding/language
329331 - pytest -v -s models/encoder_decoder/language
332+ - pytest -v -s models/encoder_decoder/vision_language
333+
334+ - label : Custom Models Test
335+ # mirror_hardwares: [amd]
336+ optional : true
337+ commands :
338+ # PR authors can temporarily add commands below to test individual models
339+ # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
340+ # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
330341
331342# #### 1 GPU test #####
332343# #### multi gpus test #####
@@ -359,7 +370,7 @@ steps:
359370 - # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
360371 - VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep -q 'Same node test passed'
361372
362- - label : Distributed Tests (2 GPUs) # 28min
373+ - label : Distributed Tests (2 GPUs) # 40min
363374 # mirror_hardwares: [amd]
364375 working_dir : " /vllm-workspace/tests"
365376 num_gpus : 2
@@ -376,14 +387,16 @@ steps:
376387 - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
377388 - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
378389 # Avoid importing model tests that cause CUDA reinitialization error
379- - pytest models/encoder_decoder/language/test_bart.py models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
390+ - pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
391+ - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
392+ - pytest models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
380393 - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
381394 - pip install -e ./plugins/vllm_add_dummy_model
382395 - pytest -v -s distributed/test_distributed_oot.py
383396 - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
384397 - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
385398
386- - label : Multi-step Tests (4 GPUs) # 21min
399+ - label : Multi-step Tests (4 GPUs) # 36min
387400 working_dir : " /vllm-workspace/tests"
388401 num_gpus : 4
389402 source_file_dependencies :
@@ -401,7 +414,7 @@ steps:
401414 - pytest -v -s multi_step/test_correctness_async_llm.py
402415 - pytest -v -s multi_step/test_correctness_llm.py
403416
404- - label : Pipeline Parallelism Test # 23min
417+ - label : Pipeline Parallelism Test # 45min
405418 working_dir : " /vllm-workspace/tests"
406419 num_gpus : 4
407420 source_file_dependencies :
@@ -427,7 +440,7 @@ steps:
427440 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
428441 - pytest -v -s -x lora/test_long_context.py
429442
430- - label : Weight Loading Multiple GPU Test
443+ - label : Weight Loading Multiple GPU Test # 33min
431444 working_dir : " /vllm-workspace/tests"
432445 num_gpus : 2
433446 source_file_dependencies :
0 commit comments