Skip to content

Commit f89d0e1

Browse files
[Misc] Continue refactoring model tests (#17573)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent b4003d1 commit f89d0e1

File tree

6 files changed

+20
-16
lines changed

6 files changed

+20
-16
lines changed

examples/offline_inference/qwen2_5_omni/only_thinker.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ def get_mixed_modalities_query() -> QueryResult:
4747
"image":
4848
ImageAsset("cherry_blossom").pil_image.convert("RGB"),
4949
"video":
50-
VideoAsset(name="sample_demo_1.mp4",
51-
num_frames=16).np_ndarrays,
50+
VideoAsset(name="sample_demo_1", num_frames=16).np_ndarrays,
5251
},
5352
},
5453
limit_mm_per_prompt={
@@ -66,7 +65,7 @@ def get_use_audio_in_video_query() -> QueryResult:
6665
"<|im_start|>user\n<|vision_bos|><|VIDEO|><|vision_eos|>"
6766
f"{question}<|im_end|>\n"
6867
f"<|im_start|>assistant\n")
69-
asset = VideoAsset(name="sample_demo_1.mp4", num_frames=16)
68+
asset = VideoAsset(name="sample_demo_1", num_frames=16)
7069
audio = asset.get_audio(sampling_rate=16000)
7170
assert not envs.VLLM_USE_V1, ("V1 does not support use_audio_in_video. "
7271
"Please launch this example with "

examples/offline_inference/vision_language.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ def get_multi_modal_input(args):
11091109

11101110
if args.modality == "video":
11111111
# Input video and question
1112-
video = VideoAsset(name="sample_demo_1.mp4",
1112+
video = VideoAsset(name="sample_demo_1",
11131113
num_frames=args.num_frames).np_ndarrays
11141114
vid_questions = ["Why is this video funny?"]
11151115

tests/conftest.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,18 @@ class _VideoAssets(_VideoAssetsBase):
9797

9898
def __init__(self) -> None:
9999
super().__init__([
100-
VideoAsset("sample_demo_1.mp4"),
100+
VideoAsset("sample_demo_1"),
101101
])
102102

103103
def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
104104
return [prompts["sample_demo_1"]]
105105

106106

107+
class _AudioAssetPrompts(TypedDict):
108+
mary_had_lamb: str
109+
winning_call: str
110+
111+
107112
class _AudioAssetsBase(UserList[AudioAsset]):
108113
pass
109114

@@ -116,6 +121,9 @@ def __init__(self) -> None:
116121
AudioAsset("winning_call"),
117122
])
118123

124+
def prompts(self, prompts: _AudioAssetPrompts) -> list[str]:
125+
return [prompts["mary_had_lamb"], prompts["winning_call"]]
126+
119127

120128
IMAGE_ASSETS = _ImageAssets()
121129
"""Singleton instance of :class:`_ImageAssets`."""

tests/models/multimodal/generation/test_interleaved.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
2929
image_cherry = ImageAsset("cherry_blossom").pil_image.convert("RGB")
3030
image_stop = ImageAsset("stop_sign").pil_image.convert("RGB")
3131
images = [image_cherry, image_stop]
32-
video = VideoAsset(name="sample_demo_1.mp4", num_frames=16).np_ndarrays
32+
video = VideoAsset(name="sample_demo_1", num_frames=16).np_ndarrays
3333

3434
inputs = [
3535
(

tests/models/multimodal/generation/test_intern_vit.py renamed to tests/models/multimodal/pooling/test_intern_vit.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
3-
from typing import Optional
4-
52
import pytest
63
import torch
74
import torch.nn as nn
85
from huggingface_hub import snapshot_download
96
from transformers import AutoConfig, AutoModel, CLIPImageProcessor
107

8+
from vllm.distributed import cleanup_dist_env_and_memory
9+
1110
from ....conftest import _ImageAssets
1211

1312
# we use snapshot_download to prevent conflicts between
@@ -20,7 +19,6 @@ def run_intern_vit_test(
2019
model_id: str,
2120
*,
2221
dtype: str,
23-
distributed_executor_backend: Optional[str] = None,
2422
):
2523
model = snapshot_download(model_id, allow_patterns=DOWNLOAD_PATTERN)
2624

@@ -43,7 +41,6 @@ def run_intern_vit_test(
4341
for pixel_value in pixel_values
4442
]
4543

46-
from vllm.distributed import cleanup_dist_env_and_memory
4744
from vllm.model_executor.models.intern_vit import InternVisionModel
4845
vllm_model = InternVisionModel(config)
4946
vllm_model.load_weights(hf_model.state_dict().items())
@@ -71,7 +68,7 @@ def run_intern_vit_test(
7168
])
7269
@pytest.mark.parametrize("dtype", [torch.half])
7370
@torch.inference_mode()
74-
def test_models(dist_init, image_assets, model_id, dtype: str) -> None:
71+
def test_models(image_assets, model_id, dtype: str) -> None:
7572
run_intern_vit_test(
7673
image_assets,
7774
model_id,

vllm/assets/video.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,18 +78,18 @@ def video_to_pil_images_list(path: str,
7878

7979
@dataclass(frozen=True)
8080
class VideoAsset:
81-
name: Literal["sample_demo_1.mp4"]
81+
name: Literal["sample_demo_1"]
8282
num_frames: int = -1
8383

8484
@property
8585
def pil_images(self) -> list[Image.Image]:
86-
video_path = download_video_asset(self.name)
86+
video_path = download_video_asset(self.name + ".mp4")
8787
ret = video_to_pil_images_list(video_path, self.num_frames)
8888
return ret
8989

9090
@property
9191
def np_ndarrays(self) -> npt.NDArray:
92-
video_path = download_video_asset(self.name)
92+
video_path = download_video_asset(self.name + ".mp4")
9393
ret = video_to_ndarrays(video_path, self.num_frames)
9494
return ret
9595

@@ -99,5 +99,5 @@ def get_audio(self, sampling_rate: Optional[float] = None) -> npt.NDArray:
9999
100100
See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
101101
"""
102-
video_path = download_video_asset(self.name)
102+
video_path = download_video_asset(self.name + ".mp4")
103103
return librosa.load(video_path, sr=sampling_rate)[0]

0 commit comments

Comments
 (0)