feat: Add prompt_cache_key parameter support

stainless-app[bot] · stainless-app[bot] · commit 6b45699185d9 · 2026-02-12T20:25:04.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 108
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-408a03048e7b2e79fd6495e59120ee5fc2ff71503be4a470529efaa88ca911e2.yml
-openapi_spec_hash: 24512bdd1c4bf5b8770f6b8ddf0620d0
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-d6858fead41d2db69218aca5b3b7bc8fe300a1025484c486c3cb304ed39c48bc.yml
+openapi_spec_hash: bb1cc7aff177fad17663182b20e964b6
 config_hash: 07e70c7f1980785685ea4f2618dfde62
diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py
@@ -70,6 +70,7 @@ def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -119,6 +120,8 @@ def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -172,6 +175,7 @@ def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -222,6 +226,8 @@ def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -273,6 +279,7 @@ def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -323,6 +330,8 @@ def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -373,6 +382,7 @@ def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -409,6 +419,7 @@ def create(
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
+                    "prompt_cache_key": prompt_cache_key,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "safety_identifier": safety_identifier,
@@ -561,6 +572,7 @@ async def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -610,6 +622,8 @@ async def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -663,6 +677,7 @@ async def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -713,6 +728,8 @@ async def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -764,6 +781,7 @@ async def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -814,6 +832,8 @@ async def create(
 
           presence_penalty: The penalty for repeated tokens.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning_effort: The effort level for reasoning models.
 
           response_format: The response format to use.
@@ -864,6 +884,7 @@ async def create(
         n: Optional[int] | Omit = omit,
         parallel_tool_calls: Optional[bool] | Omit = omit,
         presence_penalty: Optional[float] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
         response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
@@ -900,6 +921,7 @@ async def create(
                     "n": n,
                     "parallel_tool_calls": parallel_tool_calls,
                     "presence_penalty": presence_penalty,
+                    "prompt_cache_key": prompt_cache_key,
                     "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "safety_identifier": safety_identifier,
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
@@ -102,6 +102,7 @@ def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -148,6 +149,8 @@ def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -215,6 +218,7 @@ def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -262,6 +266,8 @@ def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -327,6 +333,7 @@ def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -374,6 +381,8 @@ def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -438,6 +447,7 @@ def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -471,6 +481,7 @@ def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "prompt": prompt,
+                    "prompt_cache_key": prompt_cache_key,
                     "reasoning": reasoning,
                     "safety_identifier": safety_identifier,
                     "store": store,
@@ -678,6 +689,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -724,6 +736,8 @@ async def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -791,6 +805,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -838,6 +853,8 @@ async def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -903,6 +920,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -950,6 +968,8 @@ async def create(
 
           prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
 
+          prompt_cache_key: A key to use when reading from or writing to the prompt cache.
+
           reasoning: Configuration for reasoning effort in OpenAI responses.
 
               Controls how much reasoning the model performs before generating a response.
@@ -1014,6 +1034,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | Omit = omit,
         previous_response_id: Optional[str] | Omit = omit,
         prompt: Optional[response_create_params.Prompt] | Omit = omit,
+        prompt_cache_key: Optional[str] | Omit = omit,
         reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
         safety_identifier: Optional[str] | Omit = omit,
         store: Optional[bool] | Omit = omit,
@@ -1047,6 +1068,7 @@ async def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "prompt": prompt,
+                    "prompt_cache_key": prompt_cache_key,
                     "reasoning": reasoning,
                     "safety_identifier": safety_identifier,
                     "store": store,
diff --git a/src/llama_stack_client/types/chat/completion_create_params.py b/src/llama_stack_client/types/chat/completion_create_params.py
@@ -80,6 +80,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     presence_penalty: Optional[float]
     """The penalty for repeated tokens."""
 
+    prompt_cache_key: Optional[str]
+    """A key to use when reading from or writing to the prompt cache."""
+
     reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
     """The effort level for reasoning models."""
 
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
@@ -134,6 +134,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     prompt: Optional[Prompt]
     """OpenAI compatible Prompt object that is used in OpenAI responses."""
 
+    prompt_cache_key: Optional[str]
+    """A key to use when reading from or writing to the prompt cache."""
+
     reasoning: Optional[Reasoning]
     """Configuration for reasoning effort in OpenAI responses.
 
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
@@ -1166,6 +1166,8 @@ class ResponseListResponse(BaseModel):
     prompt: Optional[Prompt] = None
     """OpenAI compatible Prompt object that is used in OpenAI responses."""
 
+    prompt_cache_key: Optional[str] = None
+
     reasoning: Optional[Reasoning] = None
     """Configuration for reasoning effort in OpenAI responses.
 
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
@@ -770,6 +770,8 @@ def output_text(self) -> str:
     prompt: Optional[Prompt] = None
     """OpenAI compatible Prompt object that is used in OpenAI responses."""
 
+    prompt_cache_key: Optional[str] = None
+
     reasoning: Optional[Reasoning] = None
     """Configuration for reasoning effort in OpenAI responses.
 
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
@@ -61,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
+            prompt_cache_key="prompt_cache_key",
             reasoning_effort="none",
             response_format={"type": "text"},
             safety_identifier="safety_identifier",
@@ -149,6 +150,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
+            prompt_cache_key="prompt_cache_key",
             reasoning_effort="none",
             response_format={"type": "text"},
             safety_identifier="safety_identifier",
@@ -314,6 +316,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
+            prompt_cache_key="prompt_cache_key",
             reasoning_effort="none",
             response_format={"type": "text"},
             safety_identifier="safety_identifier",
@@ -402,6 +405,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             n=1,
             parallel_tool_calls=True,
             presence_penalty=-2,
+            prompt_cache_key="prompt_cache_key",
             reasoning_effort="none",
             response_format={"type": "text"},
             safety_identifier="safety_identifier",
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
@@ -61,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
                 },
                 "version": "version",
             },
+            prompt_cache_key="prompt_cache_key",
             reasoning={"effort": "none"},
             safety_identifier="safety_identifier",
             store=True,
@@ -147,6 +148,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
                 },
                 "version": "version",
             },
+            prompt_cache_key="prompt_cache_key",
             reasoning={"effort": "none"},
             safety_identifier="safety_identifier",
             store=True,
@@ -348,6 +350,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
                 },
                 "version": "version",
             },
+            prompt_cache_key="prompt_cache_key",
             reasoning={"effort": "none"},
             safety_identifier="safety_identifier",
             store=True,
@@ -434,6 +437,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 },
                 "version": "version",
             },
+            prompt_cache_key="prompt_cache_key",
             reasoning={"effort": "none"},
             safety_identifier="safety_identifier",
             store=True,