Skip to content

Commit 6b45699

Browse files
feat: Add prompt_cache_key parameter support
1 parent 7501365 commit 6b45699

9 files changed

Lines changed: 64 additions & 2 deletions

File tree

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 108
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-408a03048e7b2e79fd6495e59120ee5fc2ff71503be4a470529efaa88ca911e2.yml
3-
openapi_spec_hash: 24512bdd1c4bf5b8770f6b8ddf0620d0
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-d6858fead41d2db69218aca5b3b7bc8fe300a1025484c486c3cb304ed39c48bc.yml
3+
openapi_spec_hash: bb1cc7aff177fad17663182b20e964b6
44
config_hash: 07e70c7f1980785685ea4f2618dfde62

src/llama_stack_client/resources/chat/completions.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def create(
7070
n: Optional[int] | Omit = omit,
7171
parallel_tool_calls: Optional[bool] | Omit = omit,
7272
presence_penalty: Optional[float] | Omit = omit,
73+
prompt_cache_key: Optional[str] | Omit = omit,
7374
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
7475
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
7576
safety_identifier: Optional[str] | Omit = omit,
@@ -119,6 +120,8 @@ def create(
119120
120121
presence_penalty: The penalty for repeated tokens.
121122
123+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
124+
122125
reasoning_effort: The effort level for reasoning models.
123126
124127
response_format: The response format to use.
@@ -172,6 +175,7 @@ def create(
172175
n: Optional[int] | Omit = omit,
173176
parallel_tool_calls: Optional[bool] | Omit = omit,
174177
presence_penalty: Optional[float] | Omit = omit,
178+
prompt_cache_key: Optional[str] | Omit = omit,
175179
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
176180
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
177181
safety_identifier: Optional[str] | Omit = omit,
@@ -222,6 +226,8 @@ def create(
222226
223227
presence_penalty: The penalty for repeated tokens.
224228
229+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
230+
225231
reasoning_effort: The effort level for reasoning models.
226232
227233
response_format: The response format to use.
@@ -273,6 +279,7 @@ def create(
273279
n: Optional[int] | Omit = omit,
274280
parallel_tool_calls: Optional[bool] | Omit = omit,
275281
presence_penalty: Optional[float] | Omit = omit,
282+
prompt_cache_key: Optional[str] | Omit = omit,
276283
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
277284
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
278285
safety_identifier: Optional[str] | Omit = omit,
@@ -323,6 +330,8 @@ def create(
323330
324331
presence_penalty: The penalty for repeated tokens.
325332
333+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
334+
326335
reasoning_effort: The effort level for reasoning models.
327336
328337
response_format: The response format to use.
@@ -373,6 +382,7 @@ def create(
373382
n: Optional[int] | Omit = omit,
374383
parallel_tool_calls: Optional[bool] | Omit = omit,
375384
presence_penalty: Optional[float] | Omit = omit,
385+
prompt_cache_key: Optional[str] | Omit = omit,
376386
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
377387
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
378388
safety_identifier: Optional[str] | Omit = omit,
@@ -409,6 +419,7 @@ def create(
409419
"n": n,
410420
"parallel_tool_calls": parallel_tool_calls,
411421
"presence_penalty": presence_penalty,
422+
"prompt_cache_key": prompt_cache_key,
412423
"reasoning_effort": reasoning_effort,
413424
"response_format": response_format,
414425
"safety_identifier": safety_identifier,
@@ -561,6 +572,7 @@ async def create(
561572
n: Optional[int] | Omit = omit,
562573
parallel_tool_calls: Optional[bool] | Omit = omit,
563574
presence_penalty: Optional[float] | Omit = omit,
575+
prompt_cache_key: Optional[str] | Omit = omit,
564576
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
565577
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
566578
safety_identifier: Optional[str] | Omit = omit,
@@ -610,6 +622,8 @@ async def create(
610622
611623
presence_penalty: The penalty for repeated tokens.
612624
625+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
626+
613627
reasoning_effort: The effort level for reasoning models.
614628
615629
response_format: The response format to use.
@@ -663,6 +677,7 @@ async def create(
663677
n: Optional[int] | Omit = omit,
664678
parallel_tool_calls: Optional[bool] | Omit = omit,
665679
presence_penalty: Optional[float] | Omit = omit,
680+
prompt_cache_key: Optional[str] | Omit = omit,
666681
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
667682
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
668683
safety_identifier: Optional[str] | Omit = omit,
@@ -713,6 +728,8 @@ async def create(
713728
714729
presence_penalty: The penalty for repeated tokens.
715730
731+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
732+
716733
reasoning_effort: The effort level for reasoning models.
717734
718735
response_format: The response format to use.
@@ -764,6 +781,7 @@ async def create(
764781
n: Optional[int] | Omit = omit,
765782
parallel_tool_calls: Optional[bool] | Omit = omit,
766783
presence_penalty: Optional[float] | Omit = omit,
784+
prompt_cache_key: Optional[str] | Omit = omit,
767785
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
768786
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
769787
safety_identifier: Optional[str] | Omit = omit,
@@ -814,6 +832,8 @@ async def create(
814832
815833
presence_penalty: The penalty for repeated tokens.
816834
835+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
836+
817837
reasoning_effort: The effort level for reasoning models.
818838
819839
response_format: The response format to use.
@@ -864,6 +884,7 @@ async def create(
864884
n: Optional[int] | Omit = omit,
865885
parallel_tool_calls: Optional[bool] | Omit = omit,
866886
presence_penalty: Optional[float] | Omit = omit,
887+
prompt_cache_key: Optional[str] | Omit = omit,
867888
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]] | Omit = omit,
868889
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
869890
safety_identifier: Optional[str] | Omit = omit,
@@ -900,6 +921,7 @@ async def create(
900921
"n": n,
901922
"parallel_tool_calls": parallel_tool_calls,
902923
"presence_penalty": presence_penalty,
924+
"prompt_cache_key": prompt_cache_key,
903925
"reasoning_effort": reasoning_effort,
904926
"response_format": response_format,
905927
"safety_identifier": safety_identifier,

src/llama_stack_client/resources/responses/responses.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def create(
102102
parallel_tool_calls: Optional[bool] | Omit = omit,
103103
previous_response_id: Optional[str] | Omit = omit,
104104
prompt: Optional[response_create_params.Prompt] | Omit = omit,
105+
prompt_cache_key: Optional[str] | Omit = omit,
105106
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
106107
safety_identifier: Optional[str] | Omit = omit,
107108
store: Optional[bool] | Omit = omit,
@@ -148,6 +149,8 @@ def create(
148149
149150
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
150151
152+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
153+
151154
reasoning: Configuration for reasoning effort in OpenAI responses.
152155
153156
Controls how much reasoning the model performs before generating a response.
@@ -215,6 +218,7 @@ def create(
215218
parallel_tool_calls: Optional[bool] | Omit = omit,
216219
previous_response_id: Optional[str] | Omit = omit,
217220
prompt: Optional[response_create_params.Prompt] | Omit = omit,
221+
prompt_cache_key: Optional[str] | Omit = omit,
218222
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
219223
safety_identifier: Optional[str] | Omit = omit,
220224
store: Optional[bool] | Omit = omit,
@@ -262,6 +266,8 @@ def create(
262266
263267
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
264268
269+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
270+
265271
reasoning: Configuration for reasoning effort in OpenAI responses.
266272
267273
Controls how much reasoning the model performs before generating a response.
@@ -327,6 +333,7 @@ def create(
327333
parallel_tool_calls: Optional[bool] | Omit = omit,
328334
previous_response_id: Optional[str] | Omit = omit,
329335
prompt: Optional[response_create_params.Prompt] | Omit = omit,
336+
prompt_cache_key: Optional[str] | Omit = omit,
330337
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
331338
safety_identifier: Optional[str] | Omit = omit,
332339
store: Optional[bool] | Omit = omit,
@@ -374,6 +381,8 @@ def create(
374381
375382
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
376383
384+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
385+
377386
reasoning: Configuration for reasoning effort in OpenAI responses.
378387
379388
Controls how much reasoning the model performs before generating a response.
@@ -438,6 +447,7 @@ def create(
438447
parallel_tool_calls: Optional[bool] | Omit = omit,
439448
previous_response_id: Optional[str] | Omit = omit,
440449
prompt: Optional[response_create_params.Prompt] | Omit = omit,
450+
prompt_cache_key: Optional[str] | Omit = omit,
441451
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
442452
safety_identifier: Optional[str] | Omit = omit,
443453
store: Optional[bool] | Omit = omit,
@@ -471,6 +481,7 @@ def create(
471481
"parallel_tool_calls": parallel_tool_calls,
472482
"previous_response_id": previous_response_id,
473483
"prompt": prompt,
484+
"prompt_cache_key": prompt_cache_key,
474485
"reasoning": reasoning,
475486
"safety_identifier": safety_identifier,
476487
"store": store,
@@ -678,6 +689,7 @@ async def create(
678689
parallel_tool_calls: Optional[bool] | Omit = omit,
679690
previous_response_id: Optional[str] | Omit = omit,
680691
prompt: Optional[response_create_params.Prompt] | Omit = omit,
692+
prompt_cache_key: Optional[str] | Omit = omit,
681693
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
682694
safety_identifier: Optional[str] | Omit = omit,
683695
store: Optional[bool] | Omit = omit,
@@ -724,6 +736,8 @@ async def create(
724736
725737
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
726738
739+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
740+
727741
reasoning: Configuration for reasoning effort in OpenAI responses.
728742
729743
Controls how much reasoning the model performs before generating a response.
@@ -791,6 +805,7 @@ async def create(
791805
parallel_tool_calls: Optional[bool] | Omit = omit,
792806
previous_response_id: Optional[str] | Omit = omit,
793807
prompt: Optional[response_create_params.Prompt] | Omit = omit,
808+
prompt_cache_key: Optional[str] | Omit = omit,
794809
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
795810
safety_identifier: Optional[str] | Omit = omit,
796811
store: Optional[bool] | Omit = omit,
@@ -838,6 +853,8 @@ async def create(
838853
839854
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
840855
856+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
857+
841858
reasoning: Configuration for reasoning effort in OpenAI responses.
842859
843860
Controls how much reasoning the model performs before generating a response.
@@ -903,6 +920,7 @@ async def create(
903920
parallel_tool_calls: Optional[bool] | Omit = omit,
904921
previous_response_id: Optional[str] | Omit = omit,
905922
prompt: Optional[response_create_params.Prompt] | Omit = omit,
923+
prompt_cache_key: Optional[str] | Omit = omit,
906924
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
907925
safety_identifier: Optional[str] | Omit = omit,
908926
store: Optional[bool] | Omit = omit,
@@ -950,6 +968,8 @@ async def create(
950968
951969
prompt: OpenAI compatible Prompt object that is used in OpenAI responses.
952970
971+
prompt_cache_key: A key to use when reading from or writing to the prompt cache.
972+
953973
reasoning: Configuration for reasoning effort in OpenAI responses.
954974
955975
Controls how much reasoning the model performs before generating a response.
@@ -1014,6 +1034,7 @@ async def create(
10141034
parallel_tool_calls: Optional[bool] | Omit = omit,
10151035
previous_response_id: Optional[str] | Omit = omit,
10161036
prompt: Optional[response_create_params.Prompt] | Omit = omit,
1037+
prompt_cache_key: Optional[str] | Omit = omit,
10171038
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
10181039
safety_identifier: Optional[str] | Omit = omit,
10191040
store: Optional[bool] | Omit = omit,
@@ -1047,6 +1068,7 @@ async def create(
10471068
"parallel_tool_calls": parallel_tool_calls,
10481069
"previous_response_id": previous_response_id,
10491070
"prompt": prompt,
1071+
"prompt_cache_key": prompt_cache_key,
10501072
"reasoning": reasoning,
10511073
"safety_identifier": safety_identifier,
10521074
"store": store,

src/llama_stack_client/types/chat/completion_create_params.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
8080
presence_penalty: Optional[float]
8181
"""The penalty for repeated tokens."""
8282

83+
prompt_cache_key: Optional[str]
84+
"""A key to use when reading from or writing to the prompt cache."""
85+
8386
reasoning_effort: Optional[Literal["none", "minimal", "low", "medium", "high", "xhigh"]]
8487
"""The effort level for reasoning models."""
8588

src/llama_stack_client/types/response_create_params.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
134134
prompt: Optional[Prompt]
135135
"""OpenAI compatible Prompt object that is used in OpenAI responses."""
136136

137+
prompt_cache_key: Optional[str]
138+
"""A key to use when reading from or writing to the prompt cache."""
139+
137140
reasoning: Optional[Reasoning]
138141
"""Configuration for reasoning effort in OpenAI responses.
139142

src/llama_stack_client/types/response_list_response.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1166,6 +1166,8 @@ class ResponseListResponse(BaseModel):
11661166
prompt: Optional[Prompt] = None
11671167
"""OpenAI compatible Prompt object that is used in OpenAI responses."""
11681168

1169+
prompt_cache_key: Optional[str] = None
1170+
11691171
reasoning: Optional[Reasoning] = None
11701172
"""Configuration for reasoning effort in OpenAI responses.
11711173

src/llama_stack_client/types/response_object.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,6 +770,8 @@ def output_text(self) -> str:
770770
prompt: Optional[Prompt] = None
771771
"""OpenAI compatible Prompt object that is used in OpenAI responses."""
772772

773+
prompt_cache_key: Optional[str] = None
774+
773775
reasoning: Optional[Reasoning] = None
774776
"""Configuration for reasoning effort in OpenAI responses.
775777

tests/api_resources/chat/test_completions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
6161
n=1,
6262
parallel_tool_calls=True,
6363
presence_penalty=-2,
64+
prompt_cache_key="prompt_cache_key",
6465
reasoning_effort="none",
6566
response_format={"type": "text"},
6667
safety_identifier="safety_identifier",
@@ -149,6 +150,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
149150
n=1,
150151
parallel_tool_calls=True,
151152
presence_penalty=-2,
153+
prompt_cache_key="prompt_cache_key",
152154
reasoning_effort="none",
153155
response_format={"type": "text"},
154156
safety_identifier="safety_identifier",
@@ -314,6 +316,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
314316
n=1,
315317
parallel_tool_calls=True,
316318
presence_penalty=-2,
319+
prompt_cache_key="prompt_cache_key",
317320
reasoning_effort="none",
318321
response_format={"type": "text"},
319322
safety_identifier="safety_identifier",
@@ -402,6 +405,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
402405
n=1,
403406
parallel_tool_calls=True,
404407
presence_penalty=-2,
408+
prompt_cache_key="prompt_cache_key",
405409
reasoning_effort="none",
406410
response_format={"type": "text"},
407411
safety_identifier="safety_identifier",

tests/api_resources/test_responses.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
6161
},
6262
"version": "version",
6363
},
64+
prompt_cache_key="prompt_cache_key",
6465
reasoning={"effort": "none"},
6566
safety_identifier="safety_identifier",
6667
store=True,
@@ -147,6 +148,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
147148
},
148149
"version": "version",
149150
},
151+
prompt_cache_key="prompt_cache_key",
150152
reasoning={"effort": "none"},
151153
safety_identifier="safety_identifier",
152154
store=True,
@@ -348,6 +350,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
348350
},
349351
"version": "version",
350352
},
353+
prompt_cache_key="prompt_cache_key",
351354
reasoning={"effort": "none"},
352355
safety_identifier="safety_identifier",
353356
store=True,
@@ -434,6 +437,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
434437
},
435438
"version": "version",
436439
},
440+
prompt_cache_key="prompt_cache_key",
437441
reasoning={"effort": "none"},
438442
safety_identifier="safety_identifier",
439443
store=True,

0 commit comments

Comments
 (0)