From 41dcf4dd8e8b1c99a262e6f42b9bf11660f6dcbd Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Mon, 17 Nov 2025 17:36:36 +0000 Subject: [PATCH 1/2] [Bugfix] Fix wrong CLI defaults for dynamic `SchedulerConfig` fields Signed-off-by: DarkLight1337 --- vllm/engine/arg_utils.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d011dfdbfbb2..ab6e5e594c23 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1046,10 +1046,18 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: description=SchedulerConfig.__doc__, ) scheduler_group.add_argument( - "--max-num-batched-tokens", **scheduler_kwargs["max_num_batched_tokens"] + "--max-num-batched-tokens", + **{ + **scheduler_kwargs["max_num_batched_tokens"], + "default": None, + }, ) scheduler_group.add_argument( - "--max-num-seqs", **scheduler_kwargs["max_num_seqs"] + "--max-num-seqs", + **{ + **scheduler_kwargs["max_num_seqs"], + "default": None, + }, ) scheduler_group.add_argument( "--max-num-partial-prefills", **scheduler_kwargs["max_num_partial_prefills"] @@ -1071,7 +1079,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "--scheduling-policy", **scheduler_kwargs["policy"] ) scheduler_group.add_argument( - "--enable-chunked-prefill", **scheduler_kwargs["enable_chunked_prefill"] + "--enable-chunked-prefill", + **{ + **scheduler_kwargs["enable_chunked_prefill"], + "default": None, + }, ) scheduler_group.add_argument( "--disable-chunked-mm-input", **scheduler_kwargs["disable_chunked_mm_input"] From 5bcc68603f40ef664cfcc3b40b2123f032a76f81 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Tue, 18 Nov 2025 03:11:19 +0000 Subject: [PATCH 2/2] Fix Signed-off-by: DarkLight1337 --- tests/entrypoints/openai/test_enable_force_include_usage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_enable_force_include_usage.py b/tests/entrypoints/openai/test_enable_force_include_usage.py index 3ddf2308eb1d..9d527c45c1fa 100644 --- a/tests/entrypoints/openai/test_enable_force_include_usage.py +++ b/tests/entrypoints/openai/test_enable_force_include_usage.py @@ -17,7 +17,7 @@ def chat_server_with_force_include_usage(request): # noqa: F811 "128", "--enforce-eager", "--max-num-seqs", - "1", + "4", "--enable-force-include-usage", "--port", "55857", @@ -78,7 +78,7 @@ def transcription_server_with_force_include_usage(): "--dtype", "bfloat16", "--max-num-seqs", - "1", + "4", "--enforce-eager", "--enable-force-include-usage", "--gpu-memory-utilization",