From 2fe9eeda6c6edea8525a0ea26042c38a79d5081c Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Tue, 11 Mar 2025 14:19:48 +0000 Subject: [PATCH 1/6] updated Signed-off-by: rshaw@neuralmagic.com --- .buildkite/test-pipeline.yaml | 1 + tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 2af76cb24dd1..c2c84d65e267 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -207,6 +207,7 @@ steps: - VLLM_USE_V1=1 pytest -v -s v1/structured_output - VLLM_USE_V1=1 pytest -v -s v1/test_stats.py - VLLM_USE_V1=1 pytest -v -s v1/test_utils.py + - VLLM_WORKER_MULTIPROC_METHOD="spawn" VLLM_USE_V1 pytest -v -x tests/v1/entrypoints # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - VLLM_USE_V1=1 pytest -v -s v1/e2e diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 871739bcf164..52ab4cf78d9a 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -220,7 +220,7 @@ def test_guided_regex(monkeypatch, sample_regex, guided_decoding_backend: str): regex=sample_regex, backend=guided_decoding_backend)) with pytest.raises(ValueError, - match="Regex guided decoding is not supported."): + match="Regex structured output is not supported."): llm.generate(prompts=[ f"Give an example IPv4 address with this regex: {sample_regex}" ] * 2, From 560563c16c241b5a2f519bdb60a278059a487f57 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Tue, 11 Mar 2025 14:53:11 +0000 Subject: [PATCH 2/6] updated Signed-off-by: rshaw@neuralmagic.com --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c2c84d65e267..277694ebd40d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -207,7 +207,7 @@ steps: - VLLM_USE_V1=1 pytest -v -s v1/structured_output - VLLM_USE_V1=1 pytest -v -s v1/test_stats.py - VLLM_USE_V1=1 pytest -v -s v1/test_utils.py - - VLLM_WORKER_MULTIPROC_METHOD="spawn" VLLM_USE_V1 pytest -v -x tests/v1/entrypoints + - VLLM_WORKER_MULTIPROC_METHOD=spawn VLLM_USE_V1 pytest -v -x tests/v1/entrypoints # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - VLLM_USE_V1=1 pytest -v -s v1/e2e From a70b25b5273ddf0ebc4eeef852a821e58dedbf4c Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Date: Tue, 11 Mar 2025 11:32:28 -0400 Subject: [PATCH 3/6] Update test-pipeline.yaml --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 277694ebd40d..13fe3e5fed91 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -207,7 +207,7 @@ steps: - VLLM_USE_V1=1 pytest -v -s v1/structured_output - VLLM_USE_V1=1 pytest -v -s v1/test_stats.py - VLLM_USE_V1=1 pytest -v -s v1/test_utils.py - - VLLM_WORKER_MULTIPROC_METHOD=spawn VLLM_USE_V1 pytest -v -x tests/v1/entrypoints + - VLLM_USE_V1 pytest -v -x tests/v1/entrypoints # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - VLLM_USE_V1=1 pytest -v -s v1/e2e From 73d5cb1e0cbc5029a6f4933ff7e08c6d67896f0b Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Date: Wed, 12 Mar 2025 07:57:22 -0400 Subject: [PATCH 4/6] Update test-pipeline.yaml --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 13fe3e5fed91..798409262092 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -207,7 +207,7 @@ steps: - VLLM_USE_V1=1 pytest -v -s v1/structured_output - VLLM_USE_V1=1 pytest -v -s v1/test_stats.py - VLLM_USE_V1=1 pytest -v -s v1/test_utils.py - - VLLM_USE_V1 pytest -v -x tests/v1/entrypoints + - VLLM_USE_V1=1 pytest -v -x tests/v1/entrypoints # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - VLLM_USE_V1=1 pytest -v -s v1/e2e From e049d17da584c5c88feab3af3e4564d4dfcd8895 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Fri, 14 Mar 2025 16:24:07 +0000 Subject: [PATCH 5/6] Apply suggestion Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 798409262092..4b13d4d7b165 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -207,7 +207,7 @@ steps: - VLLM_USE_V1=1 pytest -v -s v1/structured_output - VLLM_USE_V1=1 pytest -v -s v1/test_stats.py - VLLM_USE_V1=1 pytest -v -s v1/test_utils.py - - VLLM_USE_V1=1 pytest -v -x tests/v1/entrypoints + - VLLM_USE_V1=1 pytest -v -s v1/entrypoints # TODO: accuracy does not match, whether setting # VLLM_USE_FLASHINFER_SAMPLER or not on H100. - VLLM_USE_V1=1 pytest -v -s v1/e2e From 5e50b8184c340b9c66ac73dffa2bf97f18013248 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Fri, 14 Mar 2025 16:29:19 +0000 Subject: [PATCH 6/6] Don't use fixture Signed-off-by: DarkLight1337 --- .../llm/test_struct_output_generate.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index b99fb6a77829..5a330ff3a8bb 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -14,18 +14,13 @@ from vllm.sampling_params import GuidedDecodingParams, SamplingParams GUIDED_DECODING_BACKENDS_V1 = ["xgrammar"] - - -@pytest.fixture -def model_name(): - return [ - "Qwen/Qwen2.5-1.5B-Instruct", "mistralai/Ministral-8B-Instruct-2410" - ] +MODELS = ["Qwen/Qwen2.5-1.5B-Instruct", "mistralai/Ministral-8B-Instruct-2410"] @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_json_completion( monkeypatch: pytest.MonkeyPatch, sample_json_schema: dict[str, Any], @@ -63,6 +58,7 @@ def test_guided_json_completion( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_json_object( monkeypatch: pytest.MonkeyPatch, guided_decoding_backend: str, @@ -101,6 +97,7 @@ def test_guided_json_object( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_json_unsupported_schema( monkeypatch: pytest.MonkeyPatch, unsupported_json_schema: dict[str, Any], @@ -128,6 +125,7 @@ def test_guided_json_unsupported_schema( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_grammar_ebnf( monkeypatch: pytest.MonkeyPatch, sample_sql_ebnf: str, @@ -170,6 +168,7 @@ def test_guided_grammar_ebnf( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_grammar_lark( monkeypatch: pytest.MonkeyPatch, sample_sql_lark: str, @@ -217,6 +216,7 @@ def test_guided_grammar_lark( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_grammar_ebnf_invalid( monkeypatch: pytest.MonkeyPatch, guided_decoding_backend: str, @@ -244,6 +244,7 @@ def test_guided_grammar_ebnf_invalid( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_regex( monkeypatch: pytest.MonkeyPatch, sample_regex: str, @@ -280,6 +281,7 @@ def test_guided_regex( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("guided_decoding_backend", GUIDED_DECODING_BACKENDS_V1) +@pytest.mark.parametrize("model_name", MODELS) def test_guided_choice_completion( monkeypatch: pytest.MonkeyPatch, sample_guided_choice: str,