Skip to content

Commit 11d54f2

Browse files
whx-sjtuhw_whx
authored andcommitted
[BugFix][Core] Fix a bug running multi-modal with ascend_scheduler (vllm-project#3675)
This PR fix the bug related with running multi-modal models with AscendScheduler. This bug was introduced by PR vllm-project#2372 by using the same parameter names as vLLM with different default values. Currently I fix this bug by changing the default values of these two parameters to align with vLLM. - vLLM version: v0.11.0rc3 - vLLM main: vllm-project/vllm@17c540a Signed-off-by: hw_whx <[email protected]> Co-authored-by: hw_whx <[email protected]> Signed-off-by: nsdie <[email protected]>
1 parent 84b4386 commit 11d54f2

File tree

2 files changed

+36
-4
lines changed

2 files changed

+36
-4
lines changed

tests/e2e/singlecard/test_vlm.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,38 @@ def test_multimodal_vl(prompt_template):
5555
assert output_str, "Generated output should not be empty."
5656

5757

58+
def test_multimodal_ascend_scheduler(prompt_template):
59+
image = ImageAsset("cherry_blossom") \
60+
.pil_image.convert("RGB")
61+
img_questions = [
62+
"What is the content of this image?",
63+
"Describe the content of this image in detail.",
64+
"What's in the image?",
65+
"Where is this image taken?",
66+
]
67+
images = [image] * len(img_questions)
68+
prompts = prompt_template(img_questions)
69+
with VllmRunner("Qwen/Qwen2.5-VL-3B-Instruct",
70+
max_model_len=4096,
71+
additional_config={
72+
'ascend_scheduler_config': {
73+
'enabled': True,
74+
},
75+
},
76+
mm_processor_kwargs={
77+
"min_pixels": 28 * 28,
78+
"max_pixels": 1280 * 28 * 28,
79+
"fps": 1,
80+
},
81+
enforce_eager=True) as vllm_model:
82+
outputs = vllm_model.generate_greedy(prompts=prompts,
83+
images=images,
84+
max_tokens=64)
85+
assert len(outputs) == len(prompts)
86+
for _, output_str in outputs:
87+
assert output_str, "Generated output should not be empty."
88+
89+
5890
def test_multimodal_audio():
5991
audio_prompt = "".join([
6092
f"Audio {idx+1}: <|audio_bos|><|AUDIO|><|audio_eos|>\n"

vllm_ascend/core/schedule_config.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
@dataclass
2727
class AscendSchedulerConfig(SchedulerConfig):
2828
enable_chunked_prefill: bool = False
29-
max_long_partial_prefills: int = MAX_INT
29+
max_long_partial_prefills: int = 1
3030
long_prefill_token_threshold: int = MAX_INT
3131
policy: str = "fcfs"
3232
scheduler_cls: Union[str, Type[object]] = (
@@ -73,9 +73,9 @@ def __post_init__(self, *args) -> None:
7373
"max_num_batched_tokens and makes vLLM reject longer "
7474
"sequences. Please increase max_num_batched_tokens or "
7575
"decrease max_model_len.")
76-
# concurrent partial prefills. Default is inf
76+
# concurrent partial prefills. Default is 1 meaning not enabled.
7777
if self.max_long_partial_prefills is None:
78-
self.max_long_partial_prefills = MAX_INT
78+
self.max_long_partial_prefills = 1
7979
self.long_prefill_token_threshold = MAX_INT
8080

8181
if self.long_prefill_token_threshold is None or \
@@ -105,4 +105,4 @@ def __post_init__(self, *args) -> None:
105105
if getattr(self, "scheduler_delay_factor", 0) > 0:
106106
raise NotImplementedError(
107107
"currently AscendScheduler doesn't support scheduler_delay_factor."
108-
)
108+
)

0 commit comments

Comments
 (0)