Skip to content

Commit afb4429

Browse files
[CI/Build] Reorganize models tests (#17459)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent aa4502e commit afb4429

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+317
-324
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 38 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -390,12 +390,15 @@ steps:
390390
commands:
391391
- pytest -v -s benchmarks/
392392

393-
- label: Quantization Test # 33min
393+
- label: Quantization Test
394394
source_file_dependencies:
395395
- csrc/
396396
- vllm/model_executor/layers/quantization
397397
- tests/quantization
398-
command: VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
398+
- tests/models/quantization
399+
commands:
400+
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
401+
- pytest -v -s models/quantization
399402

400403
- label: LM Eval Small Models # 53min
401404
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
@@ -441,82 +444,70 @@ steps:
441444
commands:
442445
- pytest -v -s models/test_transformers.py
443446
- pytest -v -s models/test_registry.py
447+
- pytest -v -s models/test_utils.py
448+
- pytest -v -s models/test_vision.py
444449
# V1 Test: https://github.com/vllm-project/vllm/issues/14531
445450
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
446451
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
447452
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
448453

449-
- label: Language Models Test (Standard) # 32min
454+
- label: Language Models Test (Standard)
450455
#mirror_hardwares: [amd]
451456
source_file_dependencies:
452457
- vllm/
453-
- tests/models/decoder_only/language
454-
- tests/models/embedding/language
455-
- tests/models/encoder_decoder/language
458+
- tests/models/language
456459
commands:
457460
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
458461
- pip install 'git+https://github.com/Dao-AILab/[email protected]'
459-
- pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
460-
- pytest -v -s models/embedding/language -m core_model
462+
- pytest -v -s models/language -m core_model
461463

462-
- label: Language Models Test (Extended) # 1h10min
464+
- label: Language Models Test (Extended)
463465
optional: true
464466
source_file_dependencies:
465467
- vllm/
466-
- tests/models/decoder_only/language
467-
- tests/models/embedding/language
468-
- tests/models/encoder_decoder/language
468+
- tests/models/language
469469
commands:
470470
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
471-
- pip install causal-conv1d
472-
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
473-
- pytest -v -s models/embedding/language -m 'not core_model'
471+
- pip install 'git+https://github.com/Dao-AILab/[email protected]'
472+
- pytest -v -s models/language -m 'not core_model'
474473

475-
- label: Multi-Modal Models Test (Standard) # 40min
474+
- label: Multi-Modal Models Test (Standard)
476475
#mirror_hardwares: [amd]
477476
source_file_dependencies:
478477
- vllm/
479-
- tests/models/decoder_only/audio_language
480-
- tests/models/decoder_only/vision_language
481-
- tests/models/embedding/vision_language
482-
- tests/models/encoder_decoder/audio_language
483-
- tests/models/encoder_decoder/vision_language
478+
- tests/models/multimodal
479+
commands:
480+
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
481+
- pytest -v -s models/multimodal/processing
482+
- pytest -v -s --ignore models/multimodal/generation/test_whisper.py models/multimodal -m core_model
483+
- cd .. && pytest -v -s tests/models/multimodal/generation/test_whisper.py -m core_model # Otherwise, mp_method="spawn" doesn't work
484+
485+
- label: Multi-Modal Models Test (Extended) 1
486+
optional: true
487+
source_file_dependencies:
488+
- vllm/
489+
- tests/models/multimodal
484490
commands:
485491
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
486-
- pytest -v -s models/multimodal
487-
- pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
488-
- pytest -v -s models/decoder_only/vision_language -m 'core_model or quant_model'
489-
- pytest -v -s models/embedding/vision_language -m core_model
490-
- pytest -v -s models/encoder_decoder/audio_language -m core_model
491-
- pytest -v -s models/encoder_decoder/language -m core_model
492-
- pytest -v -s models/encoder_decoder/vision_language -m core_model
493-
- pytest -v -s models/decoder_only/vision_language/test_interleaved.py
494-
495-
- label: Multi-Modal Models Test (Extended) 1 # 48m
492+
- pytest -v -s --ignore models/multimodal/generation/test_common.py --ignore models/multimodal/processing models/multimodal -m 'not core_model'
493+
494+
- label: Multi-Modal Models Test (Extended) 2
496495
optional: true
497496
source_file_dependencies:
498497
- vllm/
499-
- tests/models/decoder_only/audio_language
500-
- tests/models/decoder_only/vision_language
501-
- tests/models/embedding/vision_language
502-
- tests/models/encoder_decoder/vision_language
498+
- tests/models/multimodal
503499
commands:
504500
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
505-
- pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
506-
- pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model'
507-
- pytest -v -s --ignore models/decoder_only/vision_language/test_models.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
508-
- pytest -v -s models/embedding/vision_language -m 'not core_model'
509-
- pytest -v -s models/encoder_decoder/language -m 'not core_model'
510-
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'
511-
512-
- label: Multi-Modal Models Test (Extended) 2 # 38m
501+
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=0) and not core_model'
502+
503+
- label: Multi-Modal Models Test (Extended) 3
513504
optional: true
514505
source_file_dependencies:
515506
- vllm/
516-
- tests/models/decoder_only/vision_language
507+
- tests/models/multimodal
517508
commands:
518509
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
519-
- pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=1) and not core_model and not quant_model'
510+
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
520511

521512
# This test is used only in PR development phase to test individual models and should never run on main
522513
- label: Custom Models Test
@@ -586,9 +577,8 @@ steps:
586577
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
587578
# Avoid importing model tests that cause CUDA reinitialization error
588579
- pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
589-
- pytest models/encoder_decoder/language/test_bart.py -v -s -m 'distributed(num_gpus=2)'
590-
- pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m 'distributed(num_gpus=2)'
591-
- pytest models/decoder_only/vision_language/test_models.py -v -s -m 'distributed(num_gpus=2)'
580+
- pytest models/language -v -s -m 'distributed(num_gpus=2)'
581+
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)'
592582
# test sequence parallel
593583
- pytest -v -s distributed/test_sequence_parallel.py
594584
# this test fails consistently.

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@ markers = [
158158
"skip_global_cleanup",
159159
"core_model: enable this model test in each PR instead of only nightly",
160160
"cpu_model: enable this model test in CPU tests",
161-
"quant_model: run this model test under Quantized category",
162161
"split: run this test as part of a split",
163162
"distributed: run this test only in distributed GPU tests",
164163
"skip_v1: do not run this test with v1",

tests/entrypoints/openai/test_embedding.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm.entrypoints.openai.protocol import EmbeddingResponse
1212
from vllm.transformers_utils.tokenizer import get_tokenizer
1313

14-
from ...models.embedding.utils import correctness_test
14+
from ...models.utils import run_embedding_correctness_test
1515
from ...utils import RemoteOpenAIServer
1616

1717
MODEL_NAME = "intfloat/multilingual-e5-small"
@@ -76,7 +76,7 @@ async def test_single_embedding(hf_model, client: openai.AsyncOpenAI,
7676
assert embeddings.usage.total_tokens == 11
7777

7878
vllm_outputs = [d.embedding for d in embeddings.data]
79-
correctness_test(hf_model, input_texts, vllm_outputs)
79+
run_embedding_correctness_test(hf_model, input_texts, vllm_outputs)
8080

8181
# test using token IDs
8282
input_tokens = [1, 1, 1, 1, 1]
@@ -121,7 +121,7 @@ async def test_batch_embedding(hf_model, client: openai.AsyncOpenAI,
121121
assert embeddings.usage.total_tokens == 33
122122

123123
vllm_outputs = [d.embedding for d in embeddings.data]
124-
correctness_test(hf_model, input_texts, vllm_outputs)
124+
run_embedding_correctness_test(hf_model, input_texts, vllm_outputs)
125125

126126
# test list[list[int]]
127127
input_tokens = [[4, 5, 7, 9, 20], [15, 29, 499], [24, 24, 24, 24, 24],
@@ -208,7 +208,7 @@ async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI,
208208
model=model_name,
209209
encoding_format="float")
210210
float_data = [d.embedding for d in responses_float.data]
211-
correctness_test(hf_model, input_texts, float_data)
211+
run_embedding_correctness_test(hf_model, input_texts, float_data)
212212

213213
responses_base64 = await client.embeddings.create(input=input_texts,
214214
model=model_name,
@@ -219,13 +219,13 @@ async def test_batch_base64_embedding(hf_model, client: openai.AsyncOpenAI,
219219
np.frombuffer(base64.b64decode(data.embedding),
220220
dtype="float32").tolist())
221221

222-
correctness_test(hf_model, input_texts, base64_data)
222+
run_embedding_correctness_test(hf_model, input_texts, base64_data)
223223

224224
# Default response is float32 decoded from base64 by OpenAI Client
225225
responses_default = await client.embeddings.create(input=input_texts,
226226
model=model_name)
227227
default_data = [d.embedding for d in responses_default.data]
228-
correctness_test(hf_model, input_texts, default_data)
228+
run_embedding_correctness_test(hf_model, input_texts, default_data)
229229

230230

231231
@pytest.mark.asyncio

tests/entrypoints/openai/test_embedding_dimensions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from vllm.entrypoints.openai.protocol import EmbeddingResponse
1212

1313
from ...conftest import HfRunner
14-
from ...models.embedding.utils import EmbedModelInfo, correctness_test
14+
from ...models.utils import EmbedModelInfo, run_embedding_correctness_test
1515
from ...utils import RemoteOpenAIServer
1616

1717
MODELS = [
@@ -95,7 +95,8 @@ async def make_request_and_correctness_test(dimensions):
9595
assert len(embeddings.data[0].embedding) == dimensions
9696

9797
vllm_outputs = [d.embedding for d in embeddings.data]
98-
correctness_test(hf_model, prompts, vllm_outputs, dimensions)
98+
run_embedding_correctness_test(hf_model, prompts, vllm_outputs,
99+
dimensions)
99100

100101
if model_info.is_matryoshka:
101102
valid_dimensions: list[Optional[int]] = [None]

tests/models/embedding/utils.py

Lines changed: 0 additions & 66 deletions
This file was deleted.

tests/models/embedding/vision_language/__init__.py

Whitespace-only changes.

tests/models/encoder_decoder/__init__.py

Whitespace-only changes.

tests/models/encoder_decoder/audio_language/__init__.py

Whitespace-only changes.

tests/models/encoder_decoder/language/__init__.py

Whitespace-only changes.

tests/models/encoder_decoder/vision_language/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)