Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions chat.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ type ChatCompletionRequest struct {
// Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
// https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
// Specifies the latency tier to use for processing the request.
ServiceTier ServiceTier `json:"service_tier,omitempty"`
}

type StreamOptions struct {
Expand Down Expand Up @@ -363,6 +365,15 @@ const (
FinishReasonNull FinishReason = "null"
)

type ServiceTier string

const (
ServiceTierAuto ServiceTier = "auto"
ServiceTierDefault ServiceTier = "default"
ServiceTierFlex ServiceTier = "flex"
ServiceTierPriority ServiceTier = "priority"
)

func (r FinishReason) MarshalJSON() ([]byte, error) {
if r == FinishReasonNull || r == "" {
return []byte("null"), nil
Expand Down Expand Up @@ -395,6 +406,7 @@ type ChatCompletionResponse struct {
Usage Usage `json:"usage"`
SystemFingerprint string `json:"system_fingerprint"`
PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"`
ServiceTier ServiceTier `json:"service_tier,omitempty"`

httpHeader
}
Expand Down
Loading