From 0f4362f132ead8f5644f90763f6efa1083ca699c Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 27 Oct 2025 16:18:47 +0000 Subject: [PATCH 1/2] validate arg priority in frontend LLM class before add request Signed-off-by: Junpu Fan --- tests/entrypoints/llm/test_generate.py | 16 ++++++++++++++++ vllm/entrypoints/llm.py | 2 ++ 2 files changed, 18 insertions(+) diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index e9993fd84061..465a68a67bca 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -71,6 +71,22 @@ def test_multiple_sampling_params(llm: LLM): assert len(PROMPTS) == len(outputs) +def test_multiple_priority(llm: LLM): + # Generate works when priority is None + outputs = llm.generate(PROMPTS, sampling_params=None, priority=None) + assert len(PROMPTS) == len(outputs) + + # Generate works when length of priority is same as the len(PROMPTS) + outputs = llm.generate(PROMPTS, sampling_params=None, priority=[0] * len(PROMPTS)) + assert len(PROMPTS) == len(outputs) + + # Exception raised, if the length of priority does not match the length of prompts + with pytest.raises(ValueError): + outputs = llm.generate( + PROMPTS, sampling_params=None, priority=[0] * (len(PROMPTS) - 1) + ) + + def test_max_model_len(): max_model_len = 20 llm = LLM( diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index c15b70a06809..8e3e51922ee7 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1564,6 +1564,8 @@ def _validate_and_add_requests( raise ValueError( "The lengths of prompts and lora_request must be the same." ) + if priority is not None and len(priority) != num_requests: + raise ValueError("The lengths of prompts and priority must be the same.") for sp in params if isinstance(params, Sequence) else (params,): if isinstance(sp, SamplingParams): From f939ca0989c3d73b71a4ed55bd7ba030d7969730 Mon Sep 17 00:00:00 2001 From: Junpu Fan Date: Mon, 27 Oct 2025 17:25:49 +0000 Subject: [PATCH 2/2] update error message and test case Signed-off-by: Junpu Fan --- tests/entrypoints/llm/test_generate.py | 4 ++++ vllm/entrypoints/llm.py | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index 465a68a67bca..34465b7d2708 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -86,6 +86,10 @@ def test_multiple_priority(llm: LLM): PROMPTS, sampling_params=None, priority=[0] * (len(PROMPTS) - 1) ) + # Exception raised, if the priority list is empty + with pytest.raises(ValueError): + outputs = llm.generate(PROMPTS, sampling_params=None, priority=[]) + def test_max_model_len(): max_model_len = 20 diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 8e3e51922ee7..6a9cf554a718 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1565,7 +1565,11 @@ def _validate_and_add_requests( "The lengths of prompts and lora_request must be the same." ) if priority is not None and len(priority) != num_requests: - raise ValueError("The lengths of prompts and priority must be the same.") + raise ValueError( + "The lengths of prompts " + f"({num_requests}) and priority ({len(priority)}) " + "must be the same." + ) for sp in params if isinstance(params, Sequence) else (params,): if isinstance(sp, SamplingParams):