Skip to content

Commit 3974d5d

Browse files
fix: align chat completion usage schema with OpenAI spec
1 parent 784dfa2 commit 3974d5d

13 files changed

Lines changed: 152 additions & 54 deletions

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 108
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-5c711749dbcc9575d8997ac3e0b2a2e45e20ef8de212cdb0fcceb7009b34cc48.yml
3-
openapi_spec_hash: 8107eabfac6b422964ac2a6688844181
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-10d6272b97f89f39692d8a734fe1e42de97b2ccc1cd56bc16113d15dff59b8dc.yml
3+
openapi_spec_hash: 69cdb9b6b2edc70ac3c70761a352d992
44
config_hash: 6aa61d4143c3e3df785972c0287d1370

src/llama_stack_client/resources/chat/completions.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def create(
7575
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
7676
safety_identifier: Optional[str] | Omit = omit,
7777
seed: Optional[int] | Omit = omit,
78+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
7879
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
7980
stream: Optional[Literal[False]] | Omit = omit,
8081
stream_options: Optional[Dict[str, object]] | Omit = omit,
@@ -130,6 +131,8 @@ def create(
130131
131132
seed: The seed to use.
132133
134+
service_tier: The service tier for the request.
135+
133136
stop: The stop tokens to use.
134137
135138
stream: Whether to stream the response.
@@ -180,6 +183,7 @@ def create(
180183
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
181184
safety_identifier: Optional[str] | Omit = omit,
182185
seed: Optional[int] | Omit = omit,
186+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
183187
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
184188
stream_options: Optional[Dict[str, object]] | Omit = omit,
185189
temperature: Optional[float] | Omit = omit,
@@ -236,6 +240,8 @@ def create(
236240
237241
seed: The seed to use.
238242
243+
service_tier: The service tier for the request.
244+
239245
stop: The stop tokens to use.
240246
241247
stream_options: The stream options to use.
@@ -284,6 +290,7 @@ def create(
284290
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
285291
safety_identifier: Optional[str] | Omit = omit,
286292
seed: Optional[int] | Omit = omit,
293+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
287294
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
288295
stream_options: Optional[Dict[str, object]] | Omit = omit,
289296
temperature: Optional[float] | Omit = omit,
@@ -340,6 +347,8 @@ def create(
340347
341348
seed: The seed to use.
342349
350+
service_tier: The service tier for the request.
351+
343352
stop: The stop tokens to use.
344353
345354
stream_options: The stream options to use.
@@ -387,6 +396,7 @@ def create(
387396
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
388397
safety_identifier: Optional[str] | Omit = omit,
389398
seed: Optional[int] | Omit = omit,
399+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
390400
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
391401
stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
392402
stream_options: Optional[Dict[str, object]] | Omit = omit,
@@ -424,6 +434,7 @@ def create(
424434
"response_format": response_format,
425435
"safety_identifier": safety_identifier,
426436
"seed": seed,
437+
"service_tier": service_tier,
427438
"stop": stop,
428439
"stream": stream,
429440
"stream_options": stream_options,
@@ -577,6 +588,7 @@ async def create(
577588
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
578589
safety_identifier: Optional[str] | Omit = omit,
579590
seed: Optional[int] | Omit = omit,
591+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
580592
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
581593
stream: Optional[Literal[False]] | Omit = omit,
582594
stream_options: Optional[Dict[str, object]] | Omit = omit,
@@ -632,6 +644,8 @@ async def create(
632644
633645
seed: The seed to use.
634646
647+
service_tier: The service tier for the request.
648+
635649
stop: The stop tokens to use.
636650
637651
stream: Whether to stream the response.
@@ -682,6 +696,7 @@ async def create(
682696
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
683697
safety_identifier: Optional[str] | Omit = omit,
684698
seed: Optional[int] | Omit = omit,
699+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
685700
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
686701
stream_options: Optional[Dict[str, object]] | Omit = omit,
687702
temperature: Optional[float] | Omit = omit,
@@ -738,6 +753,8 @@ async def create(
738753
739754
seed: The seed to use.
740755
756+
service_tier: The service tier for the request.
757+
741758
stop: The stop tokens to use.
742759
743760
stream_options: The stream options to use.
@@ -786,6 +803,7 @@ async def create(
786803
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
787804
safety_identifier: Optional[str] | Omit = omit,
788805
seed: Optional[int] | Omit = omit,
806+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
789807
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
790808
stream_options: Optional[Dict[str, object]] | Omit = omit,
791809
temperature: Optional[float] | Omit = omit,
@@ -842,6 +860,8 @@ async def create(
842860
843861
seed: The seed to use.
844862
863+
service_tier: The service tier for the request.
864+
845865
stop: The stop tokens to use.
846866
847867
stream_options: The stream options to use.
@@ -889,6 +909,7 @@ async def create(
889909
response_format: Optional[completion_create_params.ResponseFormat] | Omit = omit,
890910
safety_identifier: Optional[str] | Omit = omit,
891911
seed: Optional[int] | Omit = omit,
912+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
892913
stop: Union[str, SequenceNotStr[str], None] | Omit = omit,
893914
stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
894915
stream_options: Optional[Dict[str, object]] | Omit = omit,
@@ -926,6 +947,7 @@ async def create(
926947
"response_format": response_format,
927948
"safety_identifier": safety_identifier,
928949
"seed": seed,
950+
"service_tier": service_tier,
929951
"stop": stop,
930952
"stream": stream,
931953
"stream_options": stream_options,

src/llama_stack_client/resources/responses/responses.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def create(
105105
prompt_cache_key: Optional[str] | Omit = omit,
106106
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
107107
safety_identifier: Optional[str] | Omit = omit,
108+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
108109
store: Optional[bool] | Omit = omit,
109110
stream: Optional[Literal[False]] | Omit = omit,
110111
temperature: Optional[float] | Omit = omit,
@@ -157,6 +158,8 @@ def create(
157158
158159
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
159160
161+
service_tier: The service tier for the request.
162+
160163
store: Whether to store the response in the database.
161164
162165
stream: Whether to stream the response.
@@ -221,6 +224,7 @@ def create(
221224
prompt_cache_key: Optional[str] | Omit = omit,
222225
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
223226
safety_identifier: Optional[str] | Omit = omit,
227+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
224228
store: Optional[bool] | Omit = omit,
225229
temperature: Optional[float] | Omit = omit,
226230
text: Optional[response_create_params.Text] | Omit = omit,
@@ -274,6 +278,8 @@ def create(
274278
275279
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
276280
281+
service_tier: The service tier for the request.
282+
277283
store: Whether to store the response in the database.
278284
279285
temperature: Sampling temperature.
@@ -336,6 +342,7 @@ def create(
336342
prompt_cache_key: Optional[str] | Omit = omit,
337343
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
338344
safety_identifier: Optional[str] | Omit = omit,
345+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
339346
store: Optional[bool] | Omit = omit,
340347
temperature: Optional[float] | Omit = omit,
341348
text: Optional[response_create_params.Text] | Omit = omit,
@@ -389,6 +396,8 @@ def create(
389396
390397
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
391398
399+
service_tier: The service tier for the request.
400+
392401
store: Whether to store the response in the database.
393402
394403
temperature: Sampling temperature.
@@ -450,6 +459,7 @@ def create(
450459
prompt_cache_key: Optional[str] | Omit = omit,
451460
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
452461
safety_identifier: Optional[str] | Omit = omit,
462+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
453463
store: Optional[bool] | Omit = omit,
454464
stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
455465
temperature: Optional[float] | Omit = omit,
@@ -484,6 +494,7 @@ def create(
484494
"prompt_cache_key": prompt_cache_key,
485495
"reasoning": reasoning,
486496
"safety_identifier": safety_identifier,
497+
"service_tier": service_tier,
487498
"store": store,
488499
"stream": stream,
489500
"temperature": temperature,
@@ -692,6 +703,7 @@ async def create(
692703
prompt_cache_key: Optional[str] | Omit = omit,
693704
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
694705
safety_identifier: Optional[str] | Omit = omit,
706+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
695707
store: Optional[bool] | Omit = omit,
696708
stream: Optional[Literal[False]] | Omit = omit,
697709
temperature: Optional[float] | Omit = omit,
@@ -744,6 +756,8 @@ async def create(
744756
745757
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
746758
759+
service_tier: The service tier for the request.
760+
747761
store: Whether to store the response in the database.
748762
749763
stream: Whether to stream the response.
@@ -808,6 +822,7 @@ async def create(
808822
prompt_cache_key: Optional[str] | Omit = omit,
809823
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
810824
safety_identifier: Optional[str] | Omit = omit,
825+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
811826
store: Optional[bool] | Omit = omit,
812827
temperature: Optional[float] | Omit = omit,
813828
text: Optional[response_create_params.Text] | Omit = omit,
@@ -861,6 +876,8 @@ async def create(
861876
862877
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
863878
879+
service_tier: The service tier for the request.
880+
864881
store: Whether to store the response in the database.
865882
866883
temperature: Sampling temperature.
@@ -923,6 +940,7 @@ async def create(
923940
prompt_cache_key: Optional[str] | Omit = omit,
924941
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
925942
safety_identifier: Optional[str] | Omit = omit,
943+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
926944
store: Optional[bool] | Omit = omit,
927945
temperature: Optional[float] | Omit = omit,
928946
text: Optional[response_create_params.Text] | Omit = omit,
@@ -976,6 +994,8 @@ async def create(
976994
977995
safety_identifier: A stable identifier used for safety monitoring and abuse detection.
978996
997+
service_tier: The service tier for the request.
998+
979999
store: Whether to store the response in the database.
9801000
9811001
temperature: Sampling temperature.
@@ -1037,6 +1057,7 @@ async def create(
10371057
prompt_cache_key: Optional[str] | Omit = omit,
10381058
reasoning: Optional[response_create_params.Reasoning] | Omit = omit,
10391059
safety_identifier: Optional[str] | Omit = omit,
1060+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]] | Omit = omit,
10401061
store: Optional[bool] | Omit = omit,
10411062
stream: Optional[Literal[False]] | Literal[True] | Omit = omit,
10421063
temperature: Optional[float] | Omit = omit,
@@ -1071,6 +1092,7 @@ async def create(
10711092
"prompt_cache_key": prompt_cache_key,
10721093
"reasoning": reasoning,
10731094
"safety_identifier": safety_identifier,
1095+
"service_tier": service_tier,
10741096
"store": store,
10751097
"stream": stream,
10761098
"temperature": temperature,

src/llama_stack_client/types/chat/completion_create_params.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
9595
seed: Optional[int]
9696
"""The seed to use."""
9797

98+
service_tier: Optional[Literal["auto", "default", "flex", "priority"]]
99+
"""The service tier for the request."""
100+
98101
stop: Union[str, SequenceNotStr[str], None]
99102
"""The stop tokens to use."""
100103

src/llama_stack_client/types/chat/completion_create_response.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -188,36 +188,36 @@ class Choice(BaseModel):
188188

189189

190190
class UsageCompletionTokensDetails(BaseModel):
191-
"""Token details for output tokens in OpenAI chat completion usage."""
191+
"""Detailed breakdown of output token usage."""
192192

193193
reasoning_tokens: Optional[int] = None
194194
"""Number of tokens used for reasoning (o1/o3 models)."""
195195

196196

197197
class UsagePromptTokensDetails(BaseModel):
198-
"""Token details for prompt tokens in OpenAI chat completion usage."""
198+
"""Detailed breakdown of input token usage."""
199199

200200
cached_tokens: Optional[int] = None
201201
"""Number of tokens retrieved from cache."""
202202

203203

204204
class Usage(BaseModel):
205-
"""Usage information for OpenAI chat completion."""
205+
"""Token usage information for the completion."""
206206

207-
completion_tokens: int
207+
completion_tokens: Optional[int] = None
208208
"""Number of tokens in the completion."""
209209

210-
prompt_tokens: int
211-
"""Number of tokens in the prompt."""
212-
213-
total_tokens: int
214-
"""Total tokens used (prompt + completion)."""
215-
216210
completion_tokens_details: Optional[UsageCompletionTokensDetails] = None
217-
"""Token details for output tokens in OpenAI chat completion usage."""
211+
"""Detailed breakdown of output token usage."""
212+
213+
prompt_tokens: Optional[int] = None
214+
"""Number of tokens in the prompt."""
218215

219216
prompt_tokens_details: Optional[UsagePromptTokensDetails] = None
220-
"""Token details for prompt tokens in OpenAI chat completion usage."""
217+
"""Detailed breakdown of input token usage."""
218+
219+
total_tokens: Optional[int] = None
220+
"""Total tokens used (prompt + completion)."""
221221

222222

223223
class CompletionCreateResponse(BaseModel):
@@ -238,5 +238,8 @@ class CompletionCreateResponse(BaseModel):
238238
object: Optional[Literal["chat.completion"]] = None
239239
"""The object type."""
240240

241+
service_tier: Optional[str] = None
242+
"""The service tier that was used for this response."""
243+
241244
usage: Optional[Usage] = None
242-
"""Usage information for OpenAI chat completion."""
245+
"""Token usage information for the completion."""

0 commit comments

Comments
 (0)