Skip to content

Commit 40e7f44

Browse files
kylesayrsDefTruth
authored andcommitted
[Bugfix] Limit profiling run sequence length by max_model_len (vllm-project#14785)
Signed-off-by: Kyle Sayers <[email protected]> Signed-off-by: DefTruth <[email protected]>
1 parent 32ac8d7 commit 40e7f44

File tree

5 files changed

+9
-0
lines changed

5 files changed

+9
-0
lines changed

vllm/inputs/registry.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,11 @@ def dummy_data_for_profiling(
330330
from vllm.multimodal import MultiModalKwargs
331331
from vllm.multimodal.profiling import MultiModalProfiler
332332

333+
if seq_len > model_config.max_model_len:
334+
raise AssertionError(
335+
f"Profiling attempted with sequence length ({seq_len}) "
336+
f"greater than model length ({model_config.max_model_len})")
337+
333338
if mm_registry.has_processor(model_config):
334339
tokenizer = cached_tokenizer_from_config(model_config)
335340
processor = mm_registry.create_processor(model_config,

vllm/worker/enc_dec_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ def profile_run(self) -> None:
281281
for group_id in range(max_num_seqs):
282282
seq_len = (max_num_batched_tokens // max_num_seqs +
283283
(group_id < max_num_batched_tokens % max_num_seqs))
284+
seq_len = min(seq_len, self.model_config.max_model_len)
284285
batch_size += seq_len
285286

286287
decoder_dummy_data = self.input_registry \

vllm/worker/model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,7 @@ def _dummy_run(self,
13021302
for group_id in range(max_num_seqs):
13031303
seq_len = (max_num_batched_tokens // max_num_seqs +
13041304
(group_id < max_num_batched_tokens % max_num_seqs))
1305+
seq_len = min(seq_len, self.model_config.max_model_len)
13051306
batch_size += seq_len
13061307

13071308
dummy_data = self.input_registry \

vllm/worker/openvino_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def _prepare_model_input(
148148
seq_len = min(
149149
seq_data.get_len(),
150150
computed_len + seq_group_metadata.token_chunk_size,
151+
self.model_config.max_model_len,
151152
)
152153
if is_prompt:
153154
tokens = seq_data.get_token_ids()[computed_len:seq_len]

vllm/worker/xpu_model_runner.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ def profile_run(self) -> None:
466466
for group_id in range(max_num_seqs):
467467
seq_len = (max_num_batched_tokens // max_num_seqs +
468468
(group_id < max_num_batched_tokens % max_num_seqs))
469+
seq_len = min(seq_len, self.model_config.max_model_len)
469470
batch_size += seq_len
470471

471472
dummy_data = self.input_registry \

0 commit comments

Comments
 (0)