Skip to content

Commit 95e3f2a

Browse files
authored
[Tests] Set up pytest markers (#108)
* Add markers and refactors to use pytest params * Fix utils marker and add worker * Fix util tests and add backend one * Skip eager for multi-card tests * Dynamically mark all tests in tests/e2e * Add decoder marker to gptq * Split GTPQ test for readability --------- Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
1 parent 7c7dc3a commit 95e3f2a

16 files changed

Lines changed: 156 additions & 74 deletions

.github/workflows/test-spyre.yml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,16 @@ jobs:
4444
repo: "git+https://github.com/vllm-project/vllm --branch main"
4545
test_suite:
4646
- name: "V0-e2e"
47-
tests: "V0 and eager"
47+
markers: "v0 and cpu and e2e"
4848
flags: "--timeout=300"
4949
- name: "V1-e2e"
50-
tests: "V1- and eager"
50+
markers: "v1 and cpu and e2e"
5151
flags: "--timeout=300 --forked"
5252
- name: "V1-worker"
53-
tests: "test_sampling_metadata_in_input_batch"
53+
markers: "v1 and not e2e"
54+
flags: "--timeout=300"
55+
- name: "utils"
56+
markers: "utils"
5457
flags: "--timeout=300"
5558

5659
name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"
@@ -145,4 +148,4 @@ jobs:
145148
# re-install the vllm_sypre package from source
146149
source .venv/bin/activate
147150
python3 -m pytest ${{ matrix.test_suite.flags }} \
148-
tests -v -k "${{ matrix.test_suite.tests }}"
151+
tests -v -m "${{ matrix.test_suite.markers }}"

pyproject.toml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,17 @@ skip_gitignore = true
113113
pythonpath = ["."]
114114
markers = [
115115
"skip_global_cleanup",
116-
"core_model: enable this model test in each PR instead of only nightly",
117-
"cpu_model: enable this model test in CPU tests",
118-
"quant_model: run this model test under Quantized category",
119-
"split: run this test as part of a split",
120-
"distributed: run this test only in distributed GPU tests",
121-
"skip_v1: do not run this test with v1",
122-
"optional: optional tests that are automatically skipped, include --optional to run them",
116+
"e2e: Tests using end-to-end engine spin-up",
117+
"v0: Tests using vLLM v0 engine",
118+
"v1: Tests using vLLM v1 engine",
119+
"cpu: Tests using CPU (i.e. eager) backend",
120+
"spyre: Tests using Spyre hardware backend",
121+
"decoder: Tests for decoder models",
122+
"embedding: Tests for embedding models",
123+
"quantized: Tests for quantized models",
124+
"multi: Tests that require >1 cards",
125+
"utils: Tests for utility functions",
126+
"worker: Tests for worker logic",
123127
]
124128

125129
[tool.pymarkdown]

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
from vllm.distributed import cleanup_dist_env_and_memory
66

77

8+
def pytest_collection_modifyitems(config, items):
9+
""" Mark all tests in e2e directory"""
10+
for item in items:
11+
if "tests/e2e" in str(item.nodeid):
12+
item.add_marker(pytest.mark.e2e)
13+
14+
815
@pytest.fixture(params=[True, False])
916
def run_with_both_engines(request, monkeypatch):
1017
# Automatically runs tests twice, once with V1 and once without

tests/e2e/test_spyre_basic.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
import pytest
7-
from spyre_util import (compare_results, generate_hf_output,
7+
from spyre_util import (VLLM_VERSIONS, compare_results, generate_hf_output,
88
generate_spyre_vllm_output, get_spyre_backend_list,
99
get_spyre_model_list)
1010
from vllm import SamplingParams
@@ -15,24 +15,6 @@
1515
"user.\n\n### Instruction:\n{}\n\n### Response:")
1616

1717

18-
# Basic test to make sure we return the model_list correctly
19-
def test_get_spyre_model_list(monkeypatch):
20-
with monkeypatch.context() as m:
21-
m.setenv("VLLM_SPYRE_TEST_MODEL_DIR", "models")
22-
m.setenv("VLLM_SPYRE_TEST_MODEL_LIST", "llama-194m, " \
23-
"all-roberta-large-v1")
24-
assert get_spyre_model_list()[0] == "models/llama-194m"
25-
assert get_spyre_model_list()[1] == \
26-
"models/all-roberta-large-v1"
27-
28-
with monkeypatch.context() as m:
29-
m.setenv("VLLM_SPYRE_TEST_MODEL_DIR", "")
30-
m.setenv("VLLM_SPYRE_TEST_MODEL_LIST", "llama-194m, " \
31-
"all-roberta-large-v1")
32-
assert get_spyre_model_list()[0] == "llama-194m"
33-
assert get_spyre_model_list()[1] == "all-roberta-large-v1"
34-
35-
3618
@pytest.mark.parametrize("model", get_spyre_model_list())
3719
@pytest.mark.parametrize("prompts", [[
3820
template.format("Provide a list of instructions "
@@ -47,7 +29,7 @@ def test_get_spyre_model_list(monkeypatch):
4729
"warmup_shape", [(64, 20, 4), (64, 20, 8), (128, 20, 4),
4830
(128, 20, 8)]) # (prompt_length/new_tokens/batch_size)
4931
@pytest.mark.parametrize("backend", get_spyre_backend_list())
50-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
32+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
5133
def test_output(
5234
model: str,
5335
prompts: list[str],
@@ -102,7 +84,7 @@ def test_output(
10284

10385
@pytest.mark.parametrize("model", get_spyre_model_list())
10486
@pytest.mark.parametrize("backend", get_spyre_backend_list())
105-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
87+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
10688
def test_batch_handling(
10789
model: str,
10890
backend: str,

tests/e2e/test_spyre_embeddings.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
[(64, 4), (64, 8), (128, 4),
2121
(128, 8)]) # (prompt_length/new_tokens/batch_size)
2222
@pytest.mark.parametrize("backend", get_spyre_backend_list())
23-
@pytest.mark.parametrize("vllm_version", ["V0"]) # Todo: V1 support
23+
@pytest.mark.parametrize(
24+
"vllm_version",
25+
[pytest.param("V0", marks=pytest.mark.v0, id="v0")
26+
]) # TODO: Replace with VLLM_VERSIONS when v1 is supported.
2427
def test_output(
2528
model: str,
2629
prompts: list[str],

tests/e2e/test_spyre_max_new_tokens.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
import pytest
7-
from spyre_util import (compare_results, generate_hf_output,
7+
from spyre_util import (VLLM_VERSIONS, compare_results, generate_hf_output,
88
generate_spyre_vllm_output, get_spyre_backend_list,
99
get_spyre_model_list)
1010
from vllm import SamplingParams
@@ -27,7 +27,7 @@
2727
@pytest.mark.parametrize(
2828
"warmup_shape", [(64, 10, 4)]) # (prompt_length/new_tokens/batch_size)
2929
@pytest.mark.parametrize("backend", get_spyre_backend_list())
30-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
30+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
3131
def test_output(
3232
model: str,
3333
prompts: list[str],

tests/e2e/test_spyre_online.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,18 @@
11
import openai
22
import pytest
33

4-
from tests.spyre_util import get_spyre_backend_list, get_spyre_model_list
4+
from tests.spyre_util import (VLLM_VERSIONS, get_spyre_backend_list,
5+
get_spyre_model_list)
56

67

7-
def get_test_combinations():
8-
combinations = []
9-
10-
# Base model tests across all backends
11-
for backend in get_spyre_backend_list():
12-
for model in get_spyre_model_list():
13-
combinations.append((model, backend, None))
14-
15-
# GPTQ model only tests on sendnn_decoder
16-
for model in get_spyre_model_list(quantization="gptq"):
17-
combinations.append((model, "sendnn_decoder", "gptq"))
18-
19-
return combinations
20-
21-
22-
@pytest.mark.parametrize("model,backend,quantization", get_test_combinations())
8+
@pytest.mark.parametrize("model", get_spyre_model_list())
9+
@pytest.mark.parametrize("backend", get_spyre_backend_list())
2310
@pytest.mark.parametrize("warmup_shape", [[
2411
(64, 20, 4),
2512
]])
26-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
13+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
2714
def test_openai_serving(remote_openai_server, model, warmup_shape, backend,
28-
vllm_version, quantization):
15+
vllm_version):
2916
"""Test online serving using the `vllm serve` CLI"""
3017

3118
client = remote_openai_server.get_client()
@@ -68,3 +55,29 @@ def test_openai_serving(remote_openai_server, model, warmup_shape, backend,
6855
assert len(completion.choices[0].text) == 0
6956
except openai.BadRequestError as e:
7057
assert "warmup" in str(e)
58+
59+
60+
@pytest.mark.parametrize("model", get_spyre_model_list(quantization="gptq"))
61+
@pytest.mark.parametrize("backend", ["sendnn_decoder"])
62+
@pytest.mark.parametrize("quantization", ["gptq"])
63+
@pytest.mark.parametrize("warmup_shape", [[(64, 20, 4)]])
64+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
65+
def test_openai_serving_gptq(remote_openai_server, model, backend,
66+
warmup_shape, vllm_version, quantization):
67+
"""Test online serving a GPTQ model with the sendnn_decoder backend only"""
68+
69+
client = remote_openai_server.get_client()
70+
completion = client.completions.create(model=model,
71+
prompt="Hello World!",
72+
max_tokens=5,
73+
temperature=0.0)
74+
assert len(completion.choices) == 1
75+
assert len(completion.choices[0].text) > 0
76+
77+
completion = client.completions.create(model=model,
78+
prompt="Hello World!",
79+
max_tokens=5,
80+
temperature=1.0,
81+
n=2)
82+
assert len(completion.choices) == 2
83+
assert len(completion.choices[0].text) > 0

tests/e2e/test_spyre_online_multi.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import pytest
22

3-
from tests.spyre_util import get_spyre_backend_list, get_spyre_model_list
3+
from tests.spyre_util import (VLLM_VERSIONS, get_spyre_backend_list,
4+
get_spyre_model_list)
45

56

7+
@pytest.mark.multi
68
@pytest.mark.parametrize("model", get_spyre_model_list())
79
@pytest.mark.parametrize("warmup_shape", [[
810
(64, 20, 4),
911
]])
10-
@pytest.mark.parametrize("backend", get_spyre_backend_list())
12+
@pytest.mark.parametrize(
13+
"backend", [b for b in get_spyre_backend_list() if "eager" not in str(b)])
1114
@pytest.mark.parametrize("tensor_parallel_size", ["2"])
12-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
15+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
1316
def test_openai_tp_serving(remote_openai_server, model, warmup_shape, backend,
1417
vllm_version, tensor_parallel_size):
1518
"""Test online serving with tensor parallelism using the `vllm serve` CLI"""

tests/e2e/test_spyre_seed.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import math
77

88
import pytest
9-
from spyre_util import (generate_spyre_vllm_output, get_spyre_backend_list,
10-
get_spyre_model_list)
9+
from spyre_util import (VLLM_VERSIONS, generate_spyre_vllm_output,
10+
get_spyre_backend_list, get_spyre_model_list)
1111
from vllm import SamplingParams
1212

1313

@@ -22,7 +22,7 @@
2222
"warmup_shape", [(64, 20, 4), (64, 20, 8), (128, 20, 4),
2323
(128, 20, 8)]) # (prompt_length/new_tokens/batch_size)
2424
@pytest.mark.parametrize("backend", get_spyre_backend_list())
25-
@pytest.mark.parametrize("vllm_version", ["V0", "V1"])
25+
@pytest.mark.parametrize("vllm_version", VLLM_VERSIONS)
2626
def test_seed(
2727
model: str,
2828
prompt: str,

tests/e2e/test_spyre_static_batching_limits.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
(128, 20, 4)]]) # (prompt_length/new_tokens/batch_size)
1717
@pytest.mark.parametrize("backend", get_spyre_backend_list())
1818
@pytest.mark.parametrize("vllm_version",
19-
["V1"]) # v0 doesn't support multiple shapes
19+
[pytest.param("V1", marks=pytest.mark.v1, id="v1")
20+
]) # v0 doesn't support multiple shapes
2021
def test_max_prompt_len_and_new_tokens(model: str,
2122
warmup_shapes: list[tuple[int, int,
2223
int]],

0 commit comments

Comments
 (0)