Skip to content

Commit 6f1f27b

Browse files
committed
Fix LLaVA-NeXT feature size calculation (for real)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 996357e commit 6f1f27b

File tree

4 files changed

+28
-28
lines changed

4 files changed

+28
-28
lines changed

tests/models/decoder_only/vision_language/processing/test_llava_next.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def processor_for_llava_next():
1717

1818
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
1919
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
20-
(488, 183), (198, 176), (176, 198)])
20+
(488, 183), (198, 176), (176, 198),
21+
(161, 184), (184, 161)])
2122
@pytest.mark.parametrize("num_imgs", [1, 2])
2223
def test_processor_prompt_replacements(
2324
processor_for_llava_next,

tests/models/decoder_only/vision_language/processing/test_llava_onevision.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ def processor_for_llava_onevision():
1818
@pytest.mark.parametrize("model_id",
1919
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
2020
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
21-
(488, 183), (198, 176), (176, 198)])
21+
(488, 183), (198, 176), (176, 198),
22+
(161, 184), (184, 161)])
2223
@pytest.mark.parametrize("num_imgs", [1, 2])
2324
def test_processor_prompt_replacements(
2425
processor_for_llava_onevision,

vllm/model_executor/models/llava_next.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,30 +121,29 @@ def _get_num_unpadded_features(
121121
num_patch_height: int,
122122
num_patch_width: int,
123123
) -> tuple[int, int]:
124-
current_height = npatches * num_patch_height
125-
current_width = npatches * num_patch_width
126-
127124
# NOTE: Use float32 to remain consistent with HF output
128-
original_aspect_ratio = np.array(original_width / original_height,
129-
dtype=np.float32)
130-
current_aspect_ratio = np.array(current_width / current_height,
131-
dtype=np.float32)
125+
current_height = np.float32(npatches * num_patch_height)
126+
current_width = np.float32(npatches * num_patch_width)
127+
128+
original_width = np.float32(original_width) # type: ignore
129+
original_height = np.float32(original_height) # type: ignore
130+
131+
original_aspect_ratio = original_width / original_height
132+
current_aspect_ratio = current_width / current_height
132133

133134
if original_aspect_ratio > current_aspect_ratio:
134-
scale_factor = np.array(current_width / original_width,
135-
dtype=np.float32)
135+
scale_factor = current_width / original_width
136136
new_height = int(original_height * scale_factor)
137137
padding = (current_height - new_height) // 2
138138
current_height -= 2 * padding
139139
else:
140-
scale_factor = np.array(current_height / original_height,
141-
dtype=np.float32)
140+
scale_factor = current_height / original_height
142141
new_width = int(original_width * scale_factor)
143142
padding = (current_width - new_width) // 2
144143
current_width -= 2 * padding
145144

146-
unpadded_features = current_height * current_width
147-
newline_features = current_height
145+
unpadded_features = int(current_height * current_width)
146+
newline_features = int(current_height)
148147

149148
return (unpadded_features, newline_features)
150149

vllm/model_executor/models/llava_onevision.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,30 +104,29 @@ def _get_num_unpadded_features(
104104
num_patch_height: int,
105105
num_patch_width: int,
106106
) -> tuple[int, int]:
107-
current_height = npatches * num_patch_height
108-
current_width = npatches * num_patch_width
109-
110107
# NOTE: Use float32 to remain consistent with HF output
111-
original_aspect_ratio = np.array(original_width / original_height,
112-
dtype=np.float32)
113-
current_aspect_ratio = np.array(current_width / current_height,
114-
dtype=np.float32)
108+
current_height = np.float32(npatches * num_patch_height)
109+
current_width = np.float32(npatches * num_patch_width)
110+
111+
original_width = np.float32(original_width) # type: ignore
112+
original_height = np.float32(original_height) # type: ignore
113+
114+
original_aspect_ratio = original_width / original_height
115+
current_aspect_ratio = current_width / current_height
115116

116117
if original_aspect_ratio > current_aspect_ratio:
117-
scale_factor = np.array(current_width / original_width,
118-
dtype=np.float32)
118+
scale_factor = current_width / original_width
119119
new_height = int(original_height * scale_factor)
120120
padding = (current_height - new_height) // 2
121121
current_height -= 2 * padding
122122
else:
123-
scale_factor = np.array(current_height / original_height,
124-
dtype=np.float32)
123+
scale_factor = current_height / original_height
125124
new_width = int(original_width * scale_factor)
126125
padding = (current_width - new_width) // 2
127126
current_width -= 2 * padding
128127

129-
unpadded_features = current_height * current_width
130-
newline_features = current_height
128+
unpadded_features = int(current_height * current_width)
129+
newline_features = int(current_height)
131130

132131
ratio = math.sqrt(current_height * current_width / (9 * npatches**2))
133132
if ratio > 1.1:

0 commit comments

Comments
 (0)