Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vllm/entrypoints/openai/chat_completion/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ async def chat_completion_stream_generator(
)

tool_parsers: list[ToolParser | None] = [
self.tool_parser(tokenizer)
self.tool_parser(tokenizer, request.tools)
] * num_choices
else:
tool_parsers = [None] * num_choices
Expand Down Expand Up @@ -1331,7 +1331,7 @@ async def chat_completion_full_generator(
"Tokenizer not available when `skip_tokenizer_init=True`"
)

tool_parser = self.tool_parser(tokenizer)
tool_parser = self.tool_parser(tokenizer, request.tools)
# NOTE: We use token_ids for openai tool parser
tool_call_info = tool_parser.extract_tool_calls(
"",
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/engine/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ def _parse_tool_calls_from_content(

# Automatic Tool Call Parsing
try:
tool_parser = tool_parser_cls(tokenizer)
tool_parser = tool_parser_cls(tokenizer, request.tools)
except RuntimeError as e:
logger.exception("Error in tool parser creation.")
raise e
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/parser/responses_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(
self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
self.tool_parser_instance = None
if tool_parser_cls is not None:
self.tool_parser_instance = tool_parser_cls(tokenizer)
self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)

# Store the last finish_reason to determine response status
self.finish_reason: str | None = None
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/responses/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ async def _process_simple_streaming_events(
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
tool_parser = None
if self.parser and self.parser.tool_parser_cls:
tool_parser = self.parser.tool_parser_cls(tokenizer)
tool_parser = self.parser.tool_parser_cls(tokenizer, request.tools)
reasoning_ended = False
tool_call_text_started = False
previous_text = ""
Expand Down
4 changes: 3 additions & 1 deletion vllm/entrypoints/serve/render/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,8 @@ async def preprocess_chat(
)
raise NotImplementedError(msg)
tokenizer = renderer.get_tokenizer()
request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore[arg-type]
request = tool_parser(tokenizer, request.tools).adjust_request(
request=request # type: ignore[arg-type]
)

return conversation, [engine_prompt]
16 changes: 14 additions & 2 deletions vllm/tool_parsers/abstract_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
import os
from collections.abc import Callable, Sequence
from functools import cached_property
from typing import TypeAlias

from openai.types.responses import (
ResponseFormatTextJSONSchemaConfig,
ResponseTextConfig,
)
from openai.types.responses.tool import Tool as ResponsesTool

from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
Expand All @@ -30,6 +35,8 @@

logger = init_logger(__name__)

Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool


class ToolParser:
"""
Expand All @@ -38,14 +45,19 @@ class ToolParser:
derived classes.
"""

def __init__(self, tokenizer: TokenizerLike):
def __init__(
self,
tokenizer: TokenizerLike,
tools: list[Tool] | None = None,
):
self.prev_tool_call_arr: list[dict] = []
# the index of the tool call that is currently being parsed
self.current_tool_id: int = -1
self.current_tool_name_sent: bool = False
self.streamed_args_for_tool: list[str] = []

self.model_tokenizer = tokenizer
self.tools = tools

@cached_property
def vocab(self) -> dict[str, int]:
Expand Down
6 changes: 3 additions & 3 deletions vllm/tool_parsers/deepseekv31_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser

logger = init_logger(__name__)


class DeepSeekV31ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/deepseekv32_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)

Expand All @@ -43,8 +44,8 @@ class DeepSeekV32ToolParser(ToolParser):
</|DSML|function_calls>
"""

def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

self.prev_tool_call_arr: list[dict] = []

Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/deepseekv3_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)

logger = init_logger(__name__)


class DeepSeekV3ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/ernie45_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,20 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)

logger = init_logger(__name__)


class Ernie45ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
"""
Ernie thinking model format:
abc\n</think>\n\n\n<tool_call>\ndef\n</tool_call>\n
"""
super().__init__(tokenizer)
super().__init__(tokenizer, tools)
self.current_tool_name_sent = False
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id = -1
Expand Down
6 changes: 3 additions & 3 deletions vllm/tool_parsers/functiongemma_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser

logger = init_logger(__name__)

Expand All @@ -33,8 +33,8 @@ class FunctionGemmaToolParser(ToolParser):
<start_function_call>call:func_name{param:<escape>value<escape>}<end_function_call>
"""

def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

# Streaming state
self.current_tool_name_sent: bool = False
Expand Down
6 changes: 3 additions & 3 deletions vllm/tool_parsers/gigachat3_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser

logger = init_logger(__name__)

Expand All @@ -46,8 +46,8 @@


class GigaChat3ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
self.tool_started: bool = False
self.tool_name_sent: bool = False
self.tool_id: str | None = None
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/glm47_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@

from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import Tool
from vllm.tool_parsers.glm4_moe_tool_parser import Glm4MoeModelToolParser

logger = init_logger(__name__)


class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
# GLM-4.7 format: <tool_call>func_name[<arg_key>...]*</tool_call>
# The function name can be followed by a newline, whitespace, or
# directly by <arg_key> tags (no separator). The arg section is
Expand Down
8 changes: 4 additions & 4 deletions vllm/tool_parsers/glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
Expand All @@ -34,6 +33,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)

Expand All @@ -48,8 +48,8 @@ class Glm4MoeModelToolParser(ToolParser):
rather than waiting for the complete </arg_value> tag.
"""

def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
# Stateful streaming fields
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict[str, Any]] = []
Expand Down Expand Up @@ -122,7 +122,7 @@ def _json_escape_string_content(s: str) -> str:
def _is_string_type(
tool_name: str,
arg_name: str,
tools: list[ChatCompletionToolsParam] | None,
tools: list[Tool] | None,
) -> bool:
if tools is None:
return False
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/granite4_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)

Expand All @@ -43,8 +44,8 @@ def __init__(self, *, name: str, arguments: str | None): ...


class Granite4ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

self.prev_tool_call_arr: list[dict] = []
self.current_tool_id: int = -1
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/granite_20b_fc_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
Expand All @@ -46,8 +47,8 @@ class Granite20bFCToolParser(ToolParser):
are all set
"""

def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

self.bot_token = "<function_call>"
self.tool_start_token = self.bot_token
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/granite_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
Expand All @@ -44,8 +45,8 @@ class GraniteToolParser(ToolParser):
are all set
"""

def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
# for granite 3.0, the token `<|tool_call|>`
self.bot_token = "<|tool_call|>"
# for granite 3.1, the string `<tool_call>`
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/hermes_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.utils.mistral import is_mistral_tokenizer
Expand All @@ -31,8 +32,8 @@


class Hermes2ProToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

if is_mistral_tokenizer(tokenizer):
logger.error("Detected Mistral tokenizer when using a Hermes model")
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/hunyuan_a13b_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.tool_parsers.utils import consume_space
Expand All @@ -31,8 +32,8 @@


class HunyuanA13BToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)

# Initialize state for streaming mode
self.prev_tool_calls: list[dict] = []
Expand Down
5 changes: 3 additions & 2 deletions vllm/tool_parsers/internlm2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
ToolParser,
)
from vllm.tool_parsers.utils import extract_intermediate_diff
Expand All @@ -30,8 +31,8 @@


class Internlm2ToolParser(ToolParser):
def __init__(self, tokenizer: TokenizerLike):
super().__init__(tokenizer)
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
self.position = 0

def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
Expand Down
Loading
Loading