Skip to content

Commit 1323e8c

Browse files
authored
ci: Deprecate vllm "guided_decoding" with "structured_outputs" (#8508)
1 parent 967a175 commit 1323e8c

File tree

4 files changed

+6
-14
lines changed

4 files changed

+6
-14
lines changed

python/openai/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,10 +610,9 @@ tool calling result: The weather in Dallas, Texas is 85 degrees fahrenheit. It i
610610

611611
#### Named Tool Calling
612612

613-
The OpenAI frontend supports named function calling, utilizing guided decoding in the vLLM and TensorRT-LLM backends. Users can specify one of the tools in `tool_choice` to force the model to select a specific tool for function calling.
613+
The OpenAI frontend supports named function calling, utilizing structured outputs in the vLLM backend and guided decoding in TensorRT-LLM backend. Users can specify one of the tools in `tool_choice` to force the model to select a specific tool for function calling.
614614

615615
> [!NOTE]
616-
> The latest release of TensorRT-LLM (v0.18.0) does not yet support guided decoding. To enable this feature, use a build from the main branch of TensorRT-LLM.
617616
> For instructions on enabling guided decoding in the TensorRT-LLM backend, please refer to [this guide](https://github.com/triton-inference-server/tensorrtllm_backend/blob/main/docs/guided_decoding.md)
618617
619618
Example for making a named tool calling request:

python/openai/openai_frontend/engine/triton_engine.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,8 @@ def _get_streaming_response_delta(
678678
# check to make sure we haven't "forgotten" to stream
679679
# any tokens that were generated but previously
680680
# matched by partial json parsing, such as '}'.
681-
# only happens if we are NOT using guided decoding
681+
# only happens if we are NOT using structured outputs
682+
# or guided decoding
682683
if (
683684
self._should_check_for_unstreamed_tool_arg_tokens(
684685
response_delta=response_delta,

python/openai/openai_frontend/engine/utils/triton.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,11 +113,11 @@ def _create_vllm_generate_request(
113113

114114
guided_json = _get_guided_json_from_tool(request)
115115
if guided_json is not None:
116-
from vllm.sampling_params import GuidedDecodingParams
116+
from vllm.sampling_params import StructuredOutputsParams
117117

118118
sampling_parameters_json = json.loads(sampling_parameters)
119-
sampling_parameters_json["guided_decoding"] = json.dumps(
120-
asdict(GuidedDecodingParams.from_optional(json=guided_json))
119+
sampling_parameters_json["structured_outputs"] = json.dumps(
120+
asdict(StructuredOutputsParams.from_optional(json=guided_json))
121121
)
122122
sampling_parameters = json.dumps(sampling_parameters_json)
123123

python/openai/tests/test_tool_calling.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -341,10 +341,6 @@ async def test_tool_call_with_reply_response(
341341
# validate if steaming and non-streaming generates the same content
342342
assert "".join(chunks) == choice.message.content
343343

344-
@pytest.mark.skipif(
345-
os.environ.get("IMAGE_KIND") == "TRTLLM",
346-
reason="latest release version of Tensorrt LLM 0.18 doesn't support guided decoding",
347-
)
348344
@pytest.mark.asyncio
349345
async def test_tool_call_with_named_tool_choice(
350346
self, client: openai.AsyncOpenAI, model: str
@@ -448,10 +444,6 @@ async def test_tool_call_with_named_tool_choice(
448444
assert choice.message.role == role_name
449445
assert choice.message.tool_calls[0].function.name == function_name
450446

451-
@pytest.mark.skipif(
452-
os.environ.get("IMAGE_KIND") == "TRTLLM",
453-
reason="latest release version of Tensorrt LLM 0.18 doesn't support guided decoding",
454-
)
455447
@pytest.mark.asyncio
456448
async def test_tool_call_with_required_tool_choice(
457449
self, client: openai.AsyncOpenAI, model: str

0 commit comments

Comments
 (0)