Skip to content

Commit 6c4270f

Browse files
committed
update batching test
Signed-off-by: Isotr0py <[email protected]>
1 parent b3bff2a commit 6c4270f

File tree

1 file changed

+34
-18
lines changed

1 file changed

+34
-18
lines changed

tests/models/multimodal/generation/test_multimodal_gguf.py

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from tests.quantization.utils import is_quant_method_supported
1616
from vllm.assets.image import ImageAsset
17+
from vllm.multimodal.image import rescale_image_size
1718
from vllm.utils.torch_utils import set_default_torch_num_threads
1819

1920
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner
@@ -96,6 +97,14 @@ def run_multimodal_gguf_test(
9697
):
9798
# Load images at runtime (inside subprocess) to avoid pickle issues
9899
images = [ImageAsset(name).pil_image for name in model.image_names]
100+
size_factors = [0.25, 0.5, 1.0]
101+
inputs_per_image = [
102+
(
103+
[prompt for _ in size_factors],
104+
[rescale_image_size(image, factor) for factor in size_factors],
105+
)
106+
for image, prompt in zip(images, _GEMMA3_PROMPTS)
107+
]
99108

100109
# NOTE: Run vLLM first to avoid CUDA init issues with multiprocessing fork.
101110
# Run GGUF model via vLLM.
@@ -110,33 +119,40 @@ def run_multimodal_gguf_test(
110119
mm_processor_kwargs=model.mm_processor_kwargs,
111120
) as gguf_model,
112121
):
113-
gguf_outputs = gguf_model.generate_greedy_logprobs(
114-
prompts=model.prompt,
115-
max_tokens=max_tokens,
116-
num_logprobs=num_logprobs,
117-
images=images,
118-
)
122+
gguf_outputs_per_case = [
123+
gguf_model.generate_greedy_logprobs(
124+
prompts,
125+
max_tokens,
126+
num_logprobs=num_logprobs,
127+
images=images,
128+
)
129+
for prompts, images in inputs_per_image
130+
]
119131

120132
# Run HF model via native HuggingFace (HfRunner).
121133
with hf_runner(
122134
model.original_model,
123135
dtype=dtype,
124136
auto_cls=AutoModelForImageTextToText,
125137
) as hf_model:
126-
hf_outputs = hf_model.generate_greedy_logprobs_limit(
127-
prompts=model.prompt,
128-
max_tokens=max_tokens,
129-
num_logprobs=num_logprobs,
130-
images=images,
138+
hf_outputs_per_case = [
139+
hf_model.generate_greedy_logprobs_limit(
140+
prompts,
141+
max_tokens,
142+
num_logprobs=num_logprobs,
143+
images=images,
144+
)
145+
for prompts, images in inputs_per_image
146+
]
147+
148+
for hf_outputs, gguf_outputs in zip(hf_outputs_per_case, gguf_outputs_per_case):
149+
check_logprobs_close(
150+
outputs_0_lst=hf_outputs,
151+
outputs_1_lst=gguf_outputs,
152+
name_0="hf",
153+
name_1="gguf",
131154
)
132155

133-
check_logprobs_close(
134-
outputs_0_lst=hf_outputs,
135-
outputs_1_lst=gguf_outputs,
136-
name_0="hf",
137-
name_1="gguf",
138-
)
139-
140156

141157
@create_new_process_for_each_test()
142158
@pytest.mark.skipif(

0 commit comments

Comments
 (0)