Skip to content

Commit 53b681e

Browse files
authored
Merge branch 'main' into main
2 parents 23d72c8 + 5cdd664 commit 53b681e

File tree

36 files changed

+889
-176
lines changed

36 files changed

+889
-176
lines changed

.buildkite/test-amd.yaml

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,7 @@ steps:
715715
# we can only upgrade after this is resolved
716716
# TODO(jerryzh168): resolve the above comment
717717
- uv pip install --system torchao==0.13.0
718+
- uv pip install --system conch-triton-kernels
718719
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ --ignore quantization/test_blackwell_moe.py
719720

720721
- label: LM Eval Small Models # 15min
@@ -934,6 +935,18 @@ steps:
934935
commands:
935936
- pytest -v -s models/language/pooling_mteb_test
936937

938+
- label: Multi-Modal Processor Test (CPU)
939+
timeout_in_minutes: 60
940+
mirror_hardwares: [amdexperimental]
941+
agent_pool: mi325_1
942+
source_file_dependencies:
943+
- vllm/
944+
- tests/models/multimodal
945+
no_gpu: true
946+
commands:
947+
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
948+
- pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py
949+
937950
- label: Multi-Modal Processor Test # 44min
938951
timeout_in_minutes: 60
939952
mirror_hardwares: [amdexperimental]
@@ -1472,14 +1485,14 @@ steps:
14721485
working_dir: "/vllm-workspace/"
14731486
num_gpus: 2
14741487
commands:
1475-
- pytest -v -s tests/compile/distributed/test_async_tp.py
1488+
- VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_async_tp.py
14761489
- pytest -v -s tests/compile/distributed/test_sequence_parallelism.py
14771490
- pytest -v -s tests/compile/distributed/test_fusion_all_reduce.py
14781491
#- pytest -v -s tests/compile/distributed/test_fusions_e2e.py::test_tp2_attn_quant_allreduce_rmsnorm
1479-
- "pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'"
1480-
- pytest -v -s tests/distributed/test_sequence_parallel.py
1492+
- "VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/compile/distributed/test_fusions_e2e.py -k 'not Llama-4'"
1493+
- VLLM_TEST_CLEAN_GPU_MEMORY=1 pytest -v -s tests/distributed/test_sequence_parallel.py
14811494
- pytest -v -s tests/distributed/test_context_parallel.py
1482-
- CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1495+
- HIP_VISIBLE_DEVICES=0,1 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
14831496
- pytest -v -s tests/v1/distributed/test_dbo.py
14841497

14851498
##### B200 test #####

.github/workflows/cleanup_pr_body.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
1717

1818
- name: Set up Python
19-
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
19+
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
2020
with:
2121
python-version: '3.12'
2222

.github/workflows/pre-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ubuntu-latest
1818
steps:
1919
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
20-
- uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
20+
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
2121
with:
2222
python-version: "3.12"
2323
- run: echo "::add-matcher::.github/workflows/matchers/actionlint.json"

docker/Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,12 @@ RUN CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && \
364364
cuda-cudart-${CUDA_VERSION_DASH} \
365365
cuda-nvrtc-${CUDA_VERSION_DASH} \
366366
cuda-cuobjdump-${CUDA_VERSION_DASH} \
367-
libcublas-${CUDA_VERSION_DASH} && \
367+
# https://github.com/vllm-project/vllm/issues/29590
368+
libcurand-dev-${CUDA_VERSION_DASH} \
369+
libcublas-${CUDA_VERSION_DASH} \
370+
# Fixes nccl_allocator requiring nccl.h at runtime
371+
# https://github.com/vllm-project/vllm/blob/1336a1ea244fa8bfd7e72751cabbdb5b68a0c11a/vllm/distributed/device_communicators/pynccl_allocator.py#L22
372+
libnccl-dev && \
368373
rm -rf /var/lib/apt/lists/*
369374

370375
ARG PIP_INDEX_URL UV_INDEX_URL

examples/offline_inference/vision_language.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1801,7 +1801,10 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData:
18011801
engine_args = EngineArgs(
18021802
model=model_name,
18031803
max_model_len=4096,
1804-
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
1804+
hf_overrides={
1805+
"architectures": ["Tarsier2ForConditionalGeneration"],
1806+
"model_type": "tarsier2",
1807+
},
18051808
limit_mm_per_prompt={modality: 1},
18061809
)
18071810

examples/offline_inference/vision_language_multi_image.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,10 @@ def load_tarsier2(question: str, image_urls: list[str]) -> ModelRequestData:
12221222
trust_remote_code=True,
12231223
max_model_len=32768,
12241224
limit_mm_per_prompt={"image": len(image_urls)},
1225-
hf_overrides={"architectures": ["Tarsier2ForConditionalGeneration"]},
1225+
hf_overrides={
1226+
"architectures": ["Tarsier2ForConditionalGeneration"],
1227+
"model_type": "tarsier2",
1228+
},
12261229
)
12271230

12281231
prompt = (
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import json
4+
import tempfile
5+
from pathlib import Path
6+
7+
import pytest
8+
9+
from vllm.benchmarks.sweep.param_sweep import ParameterSweep, ParameterSweepItem
10+
11+
12+
class TestParameterSweepItem:
13+
"""Test ParameterSweepItem functionality."""
14+
15+
@pytest.mark.parametrize(
16+
"input_dict,expected",
17+
[
18+
(
19+
{"compilation_config.use_inductor_graph_partition": False},
20+
"--compilation-config.use_inductor_graph_partition=false",
21+
),
22+
(
23+
{"compilation_config.use_inductor_graph_partition": True},
24+
"--compilation-config.use_inductor_graph_partition=true",
25+
),
26+
(
27+
{"compilation_config.use_inductor": False},
28+
"--compilation-config.use_inductor=false",
29+
),
30+
(
31+
{"compilation_config.use_inductor": True},
32+
"--compilation-config.use_inductor=true",
33+
),
34+
],
35+
)
36+
def test_nested_boolean_params(self, input_dict, expected):
37+
"""Test that nested boolean params use =true/false syntax."""
38+
item = ParameterSweepItem.from_record(input_dict)
39+
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
40+
assert expected in cmd
41+
42+
@pytest.mark.parametrize(
43+
"input_dict,expected",
44+
[
45+
({"enable_prefix_caching": False}, "--no-enable-prefix-caching"),
46+
({"enable_prefix_caching": True}, "--enable-prefix-caching"),
47+
({"disable_log_stats": False}, "--no-disable-log-stats"),
48+
({"disable_log_stats": True}, "--disable-log-stats"),
49+
],
50+
)
51+
def test_non_nested_boolean_params(self, input_dict, expected):
52+
"""Test that non-nested boolean params use --no- prefix."""
53+
item = ParameterSweepItem.from_record(input_dict)
54+
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
55+
assert expected in cmd
56+
57+
@pytest.mark.parametrize(
58+
"compilation_config",
59+
[
60+
{"cudagraph_mode": "full", "mode": 2, "use_inductor_graph_partition": True},
61+
{
62+
"cudagraph_mode": "piecewise",
63+
"mode": 3,
64+
"use_inductor_graph_partition": False,
65+
},
66+
],
67+
)
68+
def test_nested_dict_value(self, compilation_config):
69+
"""Test that nested dict values are serialized as JSON."""
70+
item = ParameterSweepItem.from_record(
71+
{"compilation_config": compilation_config}
72+
)
73+
cmd = item.apply_to_cmd(["vllm", "serve", "model"])
74+
assert "--compilation-config" in cmd
75+
# The dict should be JSON serialized
76+
idx = cmd.index("--compilation-config")
77+
assert json.loads(cmd[idx + 1]) == compilation_config
78+
79+
@pytest.mark.parametrize(
80+
"input_dict,expected_key,expected_value",
81+
[
82+
({"model": "test-model"}, "--model", "test-model"),
83+
({"max_tokens": 100}, "--max-tokens", "100"),
84+
({"temperature": 0.7}, "--temperature", "0.7"),
85+
],
86+
)
87+
def test_string_and_numeric_values(self, input_dict, expected_key, expected_value):
88+
"""Test that string and numeric values are handled correctly."""
89+
item = ParameterSweepItem.from_record(input_dict)
90+
cmd = item.apply_to_cmd(["vllm", "serve"])
91+
assert expected_key in cmd
92+
assert expected_value in cmd
93+
94+
@pytest.mark.parametrize(
95+
"input_dict,expected_key,key_idx_offset",
96+
[
97+
({"max_tokens": 200}, "--max-tokens", 1),
98+
({"enable_prefix_caching": False}, "--no-enable-prefix-caching", 0),
99+
],
100+
)
101+
def test_replace_existing_parameter(self, input_dict, expected_key, key_idx_offset):
102+
"""Test that existing parameters in cmd are replaced."""
103+
item = ParameterSweepItem.from_record(input_dict)
104+
105+
if key_idx_offset == 1:
106+
# Key-value pair
107+
cmd = item.apply_to_cmd(["vllm", "serve", "--max-tokens", "100", "model"])
108+
assert expected_key in cmd
109+
idx = cmd.index(expected_key)
110+
assert cmd[idx + 1] == "200"
111+
assert "100" not in cmd
112+
else:
113+
# Boolean flag
114+
cmd = item.apply_to_cmd(
115+
["vllm", "serve", "--enable-prefix-caching", "model"]
116+
)
117+
assert expected_key in cmd
118+
assert "--enable-prefix-caching" not in cmd
119+
120+
121+
class TestParameterSweep:
122+
"""Test ParameterSweep functionality."""
123+
124+
def test_from_records_list(self):
125+
"""Test creating ParameterSweep from a list of records."""
126+
records = [
127+
{"max_tokens": 100, "temperature": 0.7},
128+
{"max_tokens": 200, "temperature": 0.9},
129+
]
130+
sweep = ParameterSweep.from_records(records)
131+
assert len(sweep) == 2
132+
assert sweep[0]["max_tokens"] == 100
133+
assert sweep[1]["max_tokens"] == 200
134+
135+
def test_read_from_dict(self):
136+
"""Test creating ParameterSweep from a dict format."""
137+
data = {
138+
"experiment1": {"max_tokens": 100, "temperature": 0.7},
139+
"experiment2": {"max_tokens": 200, "temperature": 0.9},
140+
}
141+
sweep = ParameterSweep.read_from_dict(data)
142+
assert len(sweep) == 2
143+
144+
# Check that items have the _benchmark_name field
145+
names = {item["_benchmark_name"] for item in sweep}
146+
assert names == {"experiment1", "experiment2"}
147+
148+
# Check that parameters are preserved
149+
for item in sweep:
150+
if item["_benchmark_name"] == "experiment1":
151+
assert item["max_tokens"] == 100
152+
assert item["temperature"] == 0.7
153+
elif item["_benchmark_name"] == "experiment2":
154+
assert item["max_tokens"] == 200
155+
assert item["temperature"] == 0.9
156+
157+
def test_read_json_list_format(self):
158+
"""Test reading JSON file with list format."""
159+
records = [
160+
{"max_tokens": 100, "temperature": 0.7},
161+
{"max_tokens": 200, "temperature": 0.9},
162+
]
163+
164+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
165+
json.dump(records, f)
166+
temp_path = Path(f.name)
167+
168+
try:
169+
sweep = ParameterSweep.read_json(temp_path)
170+
assert len(sweep) == 2
171+
assert sweep[0]["max_tokens"] == 100
172+
assert sweep[1]["max_tokens"] == 200
173+
finally:
174+
temp_path.unlink()
175+
176+
def test_read_json_dict_format(self):
177+
"""Test reading JSON file with dict format."""
178+
data = {
179+
"experiment1": {"max_tokens": 100, "temperature": 0.7},
180+
"experiment2": {"max_tokens": 200, "temperature": 0.9},
181+
}
182+
183+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
184+
json.dump(data, f)
185+
temp_path = Path(f.name)
186+
187+
try:
188+
sweep = ParameterSweep.read_json(temp_path)
189+
assert len(sweep) == 2
190+
191+
# Check that items have the _benchmark_name field
192+
names = {item["_benchmark_name"] for item in sweep}
193+
assert names == {"experiment1", "experiment2"}
194+
finally:
195+
temp_path.unlink()
196+
197+
def test_unique_benchmark_names_validation(self):
198+
"""Test that duplicate _benchmark_name values raise an error."""
199+
# Test with duplicate names in list format
200+
records = [
201+
{"_benchmark_name": "exp1", "max_tokens": 100},
202+
{"_benchmark_name": "exp1", "max_tokens": 200},
203+
]
204+
205+
with pytest.raises(ValueError, match="Duplicate _benchmark_name values"):
206+
ParameterSweep.from_records(records)
207+
208+
def test_unique_benchmark_names_multiple_duplicates(self):
209+
"""Test validation with multiple duplicate names."""
210+
records = [
211+
{"_benchmark_name": "exp1", "max_tokens": 100},
212+
{"_benchmark_name": "exp1", "max_tokens": 200},
213+
{"_benchmark_name": "exp2", "max_tokens": 300},
214+
{"_benchmark_name": "exp2", "max_tokens": 400},
215+
]
216+
217+
with pytest.raises(ValueError, match="Duplicate _benchmark_name values"):
218+
ParameterSweep.from_records(records)
219+
220+
def test_no_benchmark_names_allowed(self):
221+
"""Test that records without _benchmark_name are allowed."""
222+
records = [
223+
{"max_tokens": 100, "temperature": 0.7},
224+
{"max_tokens": 200, "temperature": 0.9},
225+
]
226+
sweep = ParameterSweep.from_records(records)
227+
assert len(sweep) == 2
228+
229+
def test_mixed_benchmark_names_allowed(self):
230+
"""Test that mixing records with and without _benchmark_name is allowed."""
231+
records = [
232+
{"_benchmark_name": "exp1", "max_tokens": 100},
233+
{"max_tokens": 200, "temperature": 0.9},
234+
]
235+
sweep = ParameterSweep.from_records(records)
236+
assert len(sweep) == 2
237+
238+
239+
class TestParameterSweepItemKeyNormalization:
240+
"""Test key normalization in ParameterSweepItem."""
241+
242+
def test_underscore_to_hyphen_conversion(self):
243+
"""Test that underscores are converted to hyphens in CLI."""
244+
item = ParameterSweepItem.from_record({"max_tokens": 100})
245+
cmd = item.apply_to_cmd(["vllm", "serve"])
246+
assert "--max-tokens" in cmd
247+
248+
def test_nested_key_preserves_suffix(self):
249+
"""Test that nested keys preserve the suffix format."""
250+
# The suffix after the dot should preserve underscores
251+
item = ParameterSweepItem.from_record(
252+
{"compilation_config.some_nested_param": "value"}
253+
)
254+
cmd = item.apply_to_cmd(["vllm", "serve"])
255+
# The prefix (compilation_config) gets converted to hyphens,
256+
# but the suffix (some_nested_param) is preserved
257+
assert any("compilation-config.some_nested_param" in arg for arg in cmd)

0 commit comments

Comments
 (0)