Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion config/config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"model_name": "claude-sonnet-4.6",
"model": "anthropic/claude-sonnet-4.6",
"api_key": "sk-ant-your-key",
"api_base": "https://api.anthropic.com/v1"
"api_base": "https://api.anthropic.com/v1",
"thinking_level": "high"
},
{
"model_name": "gemini",
Expand Down
8 changes: 8 additions & 0 deletions pkg/agent/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type AgentInstance struct {
MaxIterations int
MaxTokens int
Temperature float64
ThinkingLevel ThinkingLevel
ContextWindow int
SummarizeMessageThreshold int
SummarizeTokenPercent int
Expand Down Expand Up @@ -103,6 +104,12 @@ func NewAgentInstance(
temperature = *defaults.Temperature
}

var thinkingLevelStr string
if mc, err := cfg.GetModelConfig(model); err == nil {
thinkingLevelStr = mc.ThinkingLevel
}
thinkingLevel := parseThinkingLevel(thinkingLevelStr)

summarizeMessageThreshold := defaults.SummarizeMessageThreshold
if summarizeMessageThreshold == 0 {
summarizeMessageThreshold = 20
Expand Down Expand Up @@ -169,6 +176,7 @@ func NewAgentInstance(
MaxIterations: maxIter,
MaxTokens: maxTokens,
Temperature: temperature,
ThinkingLevel: thinkingLevel,
ContextWindow: maxTokens,
SummarizeMessageThreshold: summarizeMessageThreshold,
SummarizeTokenPercent: summarizeTokenPercent,
Expand Down
34 changes: 18 additions & 16 deletions pkg/agent/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,23 +771,29 @@ func (al *AgentLoop) runLLMIteration(
var response *providers.LLMResponse
var err error

llmOpts := map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
}
// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
// so checking != ThinkingOff is sufficient.
if agent.ThinkingLevel != ThinkingOff {
if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
llmOpts["thinking_level"] = string(agent.ThinkingLevel)
} else {
logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring",
map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)})
}
}

callLLM := func() (*providers.LLMResponse, error) {
if len(agent.Candidates) > 1 && al.fallback != nil {
fbResult, fbErr := al.fallback.Execute(
ctx,
agent.Candidates,
func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
return agent.Provider.Chat(
ctx,
messages,
providerToolDefs,
model,
map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
},
)
return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
},
)
if fbErr != nil {
Expand All @@ -803,11 +809,7 @@ func (al *AgentLoop) runLLMIteration(
}
return fbResult.Response, nil
}
return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
"max_tokens": agent.MaxTokens,
"temperature": agent.Temperature,
"prompt_cache_key": agent.ID,
})
return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
}

// Retry loop for context/token errors
Expand Down
39 changes: 39 additions & 0 deletions pkg/agent/thinking.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package agent

import "strings"

// ThinkingLevel controls how the provider sends thinking parameters.
//
// - "adaptive": sends {thinking: {type: "adaptive"}} + output_config.effort (Claude 4.6+)
// - "low"/"medium"/"high"/"xhigh": sends {thinking: {type: "enabled", budget_tokens: N}} (all models)
// - "off": disables thinking
type ThinkingLevel string

const (
ThinkingOff ThinkingLevel = "off"
ThinkingLow ThinkingLevel = "low"
ThinkingMedium ThinkingLevel = "medium"
ThinkingHigh ThinkingLevel = "high"
ThinkingXHigh ThinkingLevel = "xhigh"
ThinkingAdaptive ThinkingLevel = "adaptive"
)

// parseThinkingLevel normalizes a config string to a ThinkingLevel.
// Case-insensitive and whitespace-tolerant for user-facing config values.
// Returns ThinkingOff for unknown or empty values.
func parseThinkingLevel(level string) ThinkingLevel {
switch strings.ToLower(strings.TrimSpace(level)) {
case "adaptive":
return ThinkingAdaptive
case "low":
return ThinkingLow
case "medium":
return ThinkingMedium
case "high":
return ThinkingHigh
case "xhigh":
return ThinkingXHigh
default:
return ThinkingOff
}
}
35 changes: 35 additions & 0 deletions pkg/agent/thinking_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package agent

import "testing"

func TestParseThinkingLevel(t *testing.T) {
tests := []struct {
name string
input string
want ThinkingLevel
}{
{"off", "off", ThinkingOff},
{"empty", "", ThinkingOff},
{"low", "low", ThinkingLow},
{"medium", "medium", ThinkingMedium},
{"high", "high", ThinkingHigh},
{"xhigh", "xhigh", ThinkingXHigh},
{"adaptive", "adaptive", ThinkingAdaptive},
{"unknown", "unknown", ThinkingOff},
// Case-insensitive and whitespace-tolerant
{"upper_Medium", "Medium", ThinkingMedium},
{"upper_HIGH", "HIGH", ThinkingHigh},
{"mixed_Adaptive", "Adaptive", ThinkingAdaptive},
{"leading_space", " high", ThinkingHigh},
{"trailing_space", "low ", ThinkingLow},
{"both_spaces", " medium ", ThinkingMedium},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := parseThinkingLevel(tt.input); got != tt.want {
t.Errorf("parseThinkingLevel(%q) = %q, want %q", tt.input, got, tt.want)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ type ModelConfig struct {
RPM int `json:"rpm,omitempty"` // Requests per minute limit
MaxTokensField string `json:"max_tokens_field,omitempty"` // Field name for max tokens (e.g., "max_completion_tokens")
RequestTimeout int `json:"request_timeout,omitempty"`
ThinkingLevel string `json:"thinking_level,omitempty"` // Extended thinking: off|low|medium|high|xhigh|adaptive
}

// Validate checks if the ModelConfig has all required fields.
Expand Down
79 changes: 79 additions & 0 deletions pkg/providers/anthropic/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ type Provider struct {
baseURL string
}

// SupportsThinking implements providers.ThinkingCapable.
func (p *Provider) SupportsThinking() bool { return true }

func NewProvider(token string) *Provider {
return NewProviderWithBaseURL(token, "")
}
Expand Down Expand Up @@ -182,9 +185,80 @@ func buildParams(
params.Tools = translateTools(tools)
}

// Extended Thinking / Adaptive Thinking
// The thinking_level value directly determines the API parameter format:
// "adaptive" β†’ {thinking: {type: "adaptive"}} + output_config.effort
// "low/medium/high/xhigh" β†’ {thinking: {type: "enabled", budget_tokens: N}}
if level, ok := options["thinking_level"].(string); ok && level != "" && level != "off" {
applyThinkingConfig(&params, level)
}

return params, nil
}

// applyThinkingConfig sets thinking parameters based on the level value.
// "adaptive" uses the adaptive thinking API (Claude 4.6+).
// All other levels use budget_tokens which is universally supported.
//
// Anthropic API constraint: temperature must not be set when thinking is enabled.
// budget_tokens must be strictly less than max_tokens.
func applyThinkingConfig(params *anthropic.MessageNewParams, level string) {
// Anthropic API rejects requests with temperature set alongside thinking.
// Reset to zero value (omitted from JSON serialization).
if params.Temperature.Valid() {
log.Printf("anthropic: temperature cleared because thinking is enabled (level=%s)", level)
}
params.Temperature = anthropic.MessageNewParams{}.Temperature

if level == "adaptive" {
adaptive := anthropic.NewThinkingConfigAdaptiveParam()
params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
params.OutputConfig = anthropic.OutputConfigParam{
Effort: anthropic.OutputConfigEffortHigh,
}
return
}

budget := int64(levelToBudget(level))
if budget <= 0 {
return
}

// budget_tokens must be < max_tokens; clamp to respect user's max_tokens setting.
if budget >= params.MaxTokens {
log.Printf("anthropic: budget_tokens (%d) clamped to %d (max_tokens-1)", budget, params.MaxTokens-1)
budget = params.MaxTokens - 1
} else if budget > params.MaxTokens*80/100 {
log.Printf("anthropic: thinking budget (%d) exceeds 80%% of max_tokens (%d), output may be truncated",
budget, params.MaxTokens)
}
params.Thinking = anthropic.ThinkingConfigParamOfEnabled(budget)
}

// levelToBudget maps a thinking level to budget_tokens.
// Values are based on Anthropic's recommendations and community best practices:
//
// low = 4,096 β€” simple reasoning, quick debugging (Claude Code "think")
// medium = 16,384 β€” Anthropic recommended sweet spot for most tasks
// high = 32,000 β€” complex architecture, deep analysis (diminishing returns above this)
// xhigh = 64,000 β€” extreme reasoning, research problems, benchmarks
//
// Note: For Claude 4.6+, prefer adaptive thinking over manual budget_tokens.
func levelToBudget(level string) int {
switch level {
case "low":
return 4096
case "medium":
return 16384
case "high":
return 32000
case "xhigh":
return 64000
default:
return 0
}
}

func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
result := make([]anthropic.ToolUnionParam, 0, len(tools))
for _, t := range tools {
Expand Down Expand Up @@ -213,10 +287,14 @@ func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {

func parseResponse(resp *anthropic.Message) *LLMResponse {
var content strings.Builder
var reasoning strings.Builder
var toolCalls []ToolCall

for _, block := range resp.Content {
switch block.Type {
case "thinking":
tb := block.AsThinking()
reasoning.WriteString(tb.Thinking)
case "text":
tb := block.AsText()
content.WriteString(tb.Text)
Expand Down Expand Up @@ -247,6 +325,7 @@ func parseResponse(resp *anthropic.Message) *LLMResponse {

return &LLMResponse{
Content: content.String(),
Reasoning: reasoning.String(),
ToolCalls: toolCalls,
FinishReason: finishReason,
Usage: &UsageInfo{
Expand Down
Loading