llamastack · anastasds · Nov 19, 2025 · Nov 19, 2025 · Nov 21, 2025 · Dec 16, 2025
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
@@ -6921,7 +6921,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7363,7 +7362,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7531,7 +7529,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string

@@ -262,14 +262,6 @@ OpenAI provides a [prompt caching](https://platform.openai.com/docs/guides/promp
 
 ---
 
-### Parallel Tool Calls
-
-**Status:** Rumored Issue
-
-There are reports that `parallel_tool_calls` may not work correctly. This needs verification and a ticket should be opened if confirmed.
-
----
-
 ## Resolved Issues
 
 The following limitations have been addressed in recent releases:
@@ -297,3 +289,19 @@ The `require_approval` parameter for MCP tools in the Responses API now works co
 **Fixed in:** [#3003](https://github.com/llamastack/llama-stack/pull/3003) (Agent API), [#3602](https://github.com/llamastack/llama-stack/pull/3602) (Responses API)
 
 MCP tools now correctly handle array-type arguments in both the Agent API and Responses API.
+
+---
+
+### Parallel tool calls
+
+**Status:** ✅ Resolved
+
+The [`parallel_tool_calls` parameter](https://platform.openai.com/docs/api-reference/responses/create#responses_create-parallel_tool_calls) controls turn-based function calling workflows, _not_ parallelism or concurrency. See the [related function calling documentation](https://platform.openai.com/docs/guides/function-calling#parallel-function-calling).
+
+If `parallel_tool_calls=false`, the intended behavior is that multiple generated functional calls will be executed once per turn until done; the client is responsible for executing them one at a time and returning the result, in the expected format, in order to proceed.
+
+For example, with a custom tool generation request with a `get_weather` function definition, the input of "What is the weather in Tokyo and New York?" will, by default, cause two function calls to be generated - a `get_weather` function call definition for each of `Paris` and `New York`. With `parallel_tool_calls = false`, however, only one of these will be generated initially; the client is then responsible for executing that function call and appending the results to the message history, after which the conversation will proceed with the model-generated second function tool call definition.
+
+| parallel_tool_calls=true | parallel_tool_calls=false |
+|------|-------|
+| <img width="1134" height="1330" alt="Image" src="img/parallel-tool-calls-true.png" /> | <img width="1236" height="1868" alt="Image" src="img/parallel-tool-calls-false.png" /> |
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
@@ -3747,7 +3747,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -4189,7 +4188,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -4357,7 +4355,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string

diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
@@ -3466,7 +3466,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -3904,7 +3903,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string

diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
@@ -5563,7 +5563,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -6005,7 +6004,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -6173,7 +6171,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string

diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
@@ -6921,7 +6921,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7363,7 +7362,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string
@@ -7531,7 +7529,6 @@ components:
           anyOf:
           - type: boolean
           - type: 'null'
-          default: true
         previous_response_id:
           anyOf:
           - type: string

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
@@ -99,7 +99,7 @@ async def create_openai_response(
         model: str,
         prompt: OpenAIResponsePrompt | None = None,
         instructions: str | None = None,
-        parallel_tool_calls: bool | None = True,
+        parallel_tool_calls: bool | None = None,
         previous_response_id: str | None = None,
         conversation: str | None = None,
         store: bool | None = True,

diff --git a/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
@@ -450,7 +450,7 @@ async def _create_streaming_response(
         tool_choice: OpenAIResponseInputToolChoice | None = None,
         max_infer_iters: int | None = 10,
         guardrail_ids: list[str] | None = None,
-        parallel_tool_calls: bool | None = True,
+        parallel_tool_calls: bool | None = None,
         max_tool_calls: int | None = None,
         metadata: dict[str, str] | None = None,
         include: list[ResponseItemInclude] | None = None,

@@ -315,6 +315,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
                     model=self.ctx.model,
                     messages=messages,
                     # Pydantic models are dict-compatible but mypy treats them as distinct types
+                    parallel_tool_calls=self.parallel_tool_calls,
                     tools=effective_tools,  # type: ignore[arg-type]
                     tool_choice=chat_tool_choice,
                     stream=True,

diff --git a/src/llama_stack_api/agents.py b/src/llama_stack_api/agents.py
@@ -88,7 +88,7 @@ async def create_openai_response(
         model: str,
         prompt: OpenAIResponsePrompt | None = None,
         instructions: str | None = None,
-        parallel_tool_calls: bool | None = True,
+        parallel_tool_calls: bool | None = None,
         previous_response_id: str | None = None,
         conversation: str | None = None,
         store: bool | None = True,

diff --git a/src/llama_stack_api/openai_responses.py b/src/llama_stack_api/openai_responses.py
@@ -709,7 +709,7 @@ class OpenAIResponseObject(BaseModel):
     model: str
     object: Literal["response"] = "response"
     output: Sequence[OpenAIResponseOutput]
-    parallel_tool_calls: bool | None = True
+    parallel_tool_calls: bool | None = None
     previous_response_id: str | None = None
     prompt: OpenAIResponsePrompt | None = None
     status: str