diff --git a/config/config.example.json b/config/config.example.json index adae6f05c8..12f5150038 100644 --- a/config/config.example.json +++ b/config/config.example.json @@ -20,7 +20,8 @@ "model_name": "claude-sonnet-4.6", "model": "anthropic/claude-sonnet-4.6", "api_key": "sk-ant-your-key", - "api_base": "https://api.anthropic.com/v1" + "api_base": "https://api.anthropic.com/v1", + "thinking_level": "high" }, { "model_name": "gemini", diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go index ed25f537fa..1e18b6f644 100644 --- a/pkg/agent/instance.go +++ b/pkg/agent/instance.go @@ -26,6 +26,7 @@ type AgentInstance struct { MaxIterations int MaxTokens int Temperature float64 + ThinkingLevel ThinkingLevel ContextWindow int SummarizeMessageThreshold int SummarizeTokenPercent int @@ -103,6 +104,12 @@ func NewAgentInstance( temperature = *defaults.Temperature } + var thinkingLevelStr string + if mc, err := cfg.GetModelConfig(model); err == nil { + thinkingLevelStr = mc.ThinkingLevel + } + thinkingLevel := parseThinkingLevel(thinkingLevelStr) + summarizeMessageThreshold := defaults.SummarizeMessageThreshold if summarizeMessageThreshold == 0 { summarizeMessageThreshold = 20 @@ -169,6 +176,7 @@ func NewAgentInstance( MaxIterations: maxIter, MaxTokens: maxTokens, Temperature: temperature, + ThinkingLevel: thinkingLevel, ContextWindow: maxTokens, SummarizeMessageThreshold: summarizeMessageThreshold, SummarizeTokenPercent: summarizeTokenPercent, diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index db9efa2cf8..f2d7afa285 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -771,23 +771,29 @@ func (al *AgentLoop) runLLMIteration( var response *providers.LLMResponse var err error + llmOpts := map[string]any{ + "max_tokens": agent.MaxTokens, + "temperature": agent.Temperature, + "prompt_cache_key": agent.ID, + } + // parseThinkingLevel guarantees ThinkingOff for empty/unknown values, + // so checking != ThinkingOff is sufficient. + if agent.ThinkingLevel != ThinkingOff { + if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() { + llmOpts["thinking_level"] = string(agent.ThinkingLevel) + } else { + logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring", + map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)}) + } + } + callLLM := func() (*providers.LLMResponse, error) { if len(agent.Candidates) > 1 && al.fallback != nil { fbResult, fbErr := al.fallback.Execute( ctx, agent.Candidates, func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) { - return agent.Provider.Chat( - ctx, - messages, - providerToolDefs, - model, - map[string]any{ - "max_tokens": agent.MaxTokens, - "temperature": agent.Temperature, - "prompt_cache_key": agent.ID, - }, - ) + return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts) }, ) if fbErr != nil { @@ -803,11 +809,7 @@ func (al *AgentLoop) runLLMIteration( } return fbResult.Response, nil } - return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{ - "max_tokens": agent.MaxTokens, - "temperature": agent.Temperature, - "prompt_cache_key": agent.ID, - }) + return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts) } // Retry loop for context/token errors diff --git a/pkg/agent/thinking.go b/pkg/agent/thinking.go new file mode 100644 index 0000000000..015b692822 --- /dev/null +++ b/pkg/agent/thinking.go @@ -0,0 +1,39 @@ +package agent + +import "strings" + +// ThinkingLevel controls how the provider sends thinking parameters. +// +// - "adaptive": sends {thinking: {type: "adaptive"}} + output_config.effort (Claude 4.6+) +// - "low"/"medium"/"high"/"xhigh": sends {thinking: {type: "enabled", budget_tokens: N}} (all models) +// - "off": disables thinking +type ThinkingLevel string + +const ( + ThinkingOff ThinkingLevel = "off" + ThinkingLow ThinkingLevel = "low" + ThinkingMedium ThinkingLevel = "medium" + ThinkingHigh ThinkingLevel = "high" + ThinkingXHigh ThinkingLevel = "xhigh" + ThinkingAdaptive ThinkingLevel = "adaptive" +) + +// parseThinkingLevel normalizes a config string to a ThinkingLevel. +// Case-insensitive and whitespace-tolerant for user-facing config values. +// Returns ThinkingOff for unknown or empty values. +func parseThinkingLevel(level string) ThinkingLevel { + switch strings.ToLower(strings.TrimSpace(level)) { + case "adaptive": + return ThinkingAdaptive + case "low": + return ThinkingLow + case "medium": + return ThinkingMedium + case "high": + return ThinkingHigh + case "xhigh": + return ThinkingXHigh + default: + return ThinkingOff + } +} diff --git a/pkg/agent/thinking_test.go b/pkg/agent/thinking_test.go new file mode 100644 index 0000000000..be3a68c335 --- /dev/null +++ b/pkg/agent/thinking_test.go @@ -0,0 +1,35 @@ +package agent + +import "testing" + +func TestParseThinkingLevel(t *testing.T) { + tests := []struct { + name string + input string + want ThinkingLevel + }{ + {"off", "off", ThinkingOff}, + {"empty", "", ThinkingOff}, + {"low", "low", ThinkingLow}, + {"medium", "medium", ThinkingMedium}, + {"high", "high", ThinkingHigh}, + {"xhigh", "xhigh", ThinkingXHigh}, + {"adaptive", "adaptive", ThinkingAdaptive}, + {"unknown", "unknown", ThinkingOff}, + // Case-insensitive and whitespace-tolerant + {"upper_Medium", "Medium", ThinkingMedium}, + {"upper_HIGH", "HIGH", ThinkingHigh}, + {"mixed_Adaptive", "Adaptive", ThinkingAdaptive}, + {"leading_space", " high", ThinkingHigh}, + {"trailing_space", "low ", ThinkingLow}, + {"both_spaces", " medium ", ThinkingMedium}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := parseThinkingLevel(tt.input); got != tt.want { + t.Errorf("parseThinkingLevel(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/pkg/config/config.go b/pkg/config/config.go index f40e05e1c2..f85708ef09 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -505,6 +505,7 @@ type ModelConfig struct { RPM int `json:"rpm,omitempty"` // Requests per minute limit MaxTokensField string `json:"max_tokens_field,omitempty"` // Field name for max tokens (e.g., "max_completion_tokens") RequestTimeout int `json:"request_timeout,omitempty"` + ThinkingLevel string `json:"thinking_level,omitempty"` // Extended thinking: off|low|medium|high|xhigh|adaptive } // Validate checks if the ModelConfig has all required fields. diff --git a/pkg/providers/anthropic/provider.go b/pkg/providers/anthropic/provider.go index 1bb15f771f..1b250b9b47 100644 --- a/pkg/providers/anthropic/provider.go +++ b/pkg/providers/anthropic/provider.go @@ -31,6 +31,9 @@ type Provider struct { baseURL string } +// SupportsThinking implements providers.ThinkingCapable. +func (p *Provider) SupportsThinking() bool { return true } + func NewProvider(token string) *Provider { return NewProviderWithBaseURL(token, "") } @@ -182,9 +185,80 @@ func buildParams( params.Tools = translateTools(tools) } + // Extended Thinking / Adaptive Thinking + // The thinking_level value directly determines the API parameter format: + // "adaptive" → {thinking: {type: "adaptive"}} + output_config.effort + // "low/medium/high/xhigh" → {thinking: {type: "enabled", budget_tokens: N}} + if level, ok := options["thinking_level"].(string); ok && level != "" && level != "off" { + applyThinkingConfig(¶ms, level) + } + return params, nil } +// applyThinkingConfig sets thinking parameters based on the level value. +// "adaptive" uses the adaptive thinking API (Claude 4.6+). +// All other levels use budget_tokens which is universally supported. +// +// Anthropic API constraint: temperature must not be set when thinking is enabled. +// budget_tokens must be strictly less than max_tokens. +func applyThinkingConfig(params *anthropic.MessageNewParams, level string) { + // Anthropic API rejects requests with temperature set alongside thinking. + // Reset to zero value (omitted from JSON serialization). + if params.Temperature.Valid() { + log.Printf("anthropic: temperature cleared because thinking is enabled (level=%s)", level) + } + params.Temperature = anthropic.MessageNewParams{}.Temperature + + if level == "adaptive" { + adaptive := anthropic.NewThinkingConfigAdaptiveParam() + params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive} + params.OutputConfig = anthropic.OutputConfigParam{ + Effort: anthropic.OutputConfigEffortHigh, + } + return + } + + budget := int64(levelToBudget(level)) + if budget <= 0 { + return + } + + // budget_tokens must be < max_tokens; clamp to respect user's max_tokens setting. + if budget >= params.MaxTokens { + log.Printf("anthropic: budget_tokens (%d) clamped to %d (max_tokens-1)", budget, params.MaxTokens-1) + budget = params.MaxTokens - 1 + } else if budget > params.MaxTokens*80/100 { + log.Printf("anthropic: thinking budget (%d) exceeds 80%% of max_tokens (%d), output may be truncated", + budget, params.MaxTokens) + } + params.Thinking = anthropic.ThinkingConfigParamOfEnabled(budget) +} + +// levelToBudget maps a thinking level to budget_tokens. +// Values are based on Anthropic's recommendations and community best practices: +// +// low = 4,096 — simple reasoning, quick debugging (Claude Code "think") +// medium = 16,384 — Anthropic recommended sweet spot for most tasks +// high = 32,000 — complex architecture, deep analysis (diminishing returns above this) +// xhigh = 64,000 — extreme reasoning, research problems, benchmarks +// +// Note: For Claude 4.6+, prefer adaptive thinking over manual budget_tokens. +func levelToBudget(level string) int { + switch level { + case "low": + return 4096 + case "medium": + return 16384 + case "high": + return 32000 + case "xhigh": + return 64000 + default: + return 0 + } +} + func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam { result := make([]anthropic.ToolUnionParam, 0, len(tools)) for _, t := range tools { @@ -213,10 +287,14 @@ func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam { func parseResponse(resp *anthropic.Message) *LLMResponse { var content strings.Builder + var reasoning strings.Builder var toolCalls []ToolCall for _, block := range resp.Content { switch block.Type { + case "thinking": + tb := block.AsThinking() + reasoning.WriteString(tb.Thinking) case "text": tb := block.AsText() content.WriteString(tb.Text) @@ -247,6 +325,7 @@ func parseResponse(resp *anthropic.Message) *LLMResponse { return &LLMResponse{ Content: content.String(), + Reasoning: reasoning.String(), ToolCalls: toolCalls, FinishReason: finishReason, Usage: &UsageInfo{ diff --git a/pkg/providers/anthropic/thinking_test.go b/pkg/providers/anthropic/thinking_test.go new file mode 100644 index 0000000000..e69a3869e3 --- /dev/null +++ b/pkg/providers/anthropic/thinking_test.go @@ -0,0 +1,212 @@ +package anthropicprovider + +import ( + "encoding/json" + "testing" + + "github.com/anthropics/anthropic-sdk-go" +) + +func TestApplyThinkingConfig_Adaptive(t *testing.T) { + params := anthropic.MessageNewParams{ + MaxTokens: 16000, + Temperature: anthropic.Float(0.7), + } + applyThinkingConfig(¶ms, "adaptive") + + if params.Thinking.OfAdaptive == nil { + t.Fatal("expected adaptive thinking") + } + if params.Thinking.OfEnabled != nil { + t.Error("should not set enabled thinking in adaptive mode") + } + if params.OutputConfig.Effort != anthropic.OutputConfigEffortHigh { + t.Errorf("effort = %q, want %q", params.OutputConfig.Effort, anthropic.OutputConfigEffortHigh) + } + if params.Temperature.Valid() { + t.Error("temperature should be cleared when thinking is enabled") + } +} + +func TestApplyThinkingConfig_BudgetLevels(t *testing.T) { + tests := []struct { + level string + wantBudget int64 + }{ + {"low", 4096}, + {"medium", 16384}, + {"high", 32000}, + {"xhigh", 64000}, + } + + for _, tt := range tests { + t.Run(tt.level, func(t *testing.T) { + params := anthropic.MessageNewParams{ + MaxTokens: 200000, + Temperature: anthropic.Float(0.5), + } + applyThinkingConfig(¶ms, tt.level) + + if params.Thinking.OfEnabled == nil { + t.Fatal("expected enabled thinking") + } + if params.Thinking.OfAdaptive != nil { + t.Error("should not set adaptive thinking") + } + if params.Thinking.OfEnabled.BudgetTokens != tt.wantBudget { + t.Errorf("budget_tokens = %d, want %d", params.Thinking.OfEnabled.BudgetTokens, tt.wantBudget) + } + if params.OutputConfig.Effort != "" { + t.Errorf("effort = %q, want empty", params.OutputConfig.Effort) + } + if params.Temperature.Valid() { + t.Error("temperature should be cleared when thinking is enabled") + } + }) + } +} + +func TestApplyThinkingConfig_BudgetClamp(t *testing.T) { + // budget_tokens must be < max_tokens; clamp budget down to respect user's max_tokens. + params := anthropic.MessageNewParams{MaxTokens: 4096} + applyThinkingConfig(¶ms, "high") // budget=32000 > maxTokens=4096 + + if params.Thinking.OfEnabled == nil { + t.Fatal("expected enabled thinking") + } + if params.Thinking.OfEnabled.BudgetTokens != 4095 { + t.Errorf("budget_tokens = %d, want 4095 (maxTokens-1)", params.Thinking.OfEnabled.BudgetTokens) + } + if params.MaxTokens != 4096 { + t.Errorf("max_tokens should not be modified, got %d", params.MaxTokens) + } +} + +func TestApplyThinkingConfig_UnknownLevel(t *testing.T) { + params := anthropic.MessageNewParams{MaxTokens: 16000} + applyThinkingConfig(¶ms, "unknown") + + if params.Thinking.OfEnabled != nil { + t.Error("should not set enabled thinking for unknown level") + } + if params.Thinking.OfAdaptive != nil { + t.Error("should not set adaptive thinking for unknown level") + } +} + +func TestLevelToBudget(t *testing.T) { + tests := []struct { + name string + level string + want int + }{ + {"low", "low", 4096}, + {"medium", "medium", 16384}, + {"high", "high", 32000}, + {"xhigh", "xhigh", 64000}, + {"off", "off", 0}, + {"empty", "", 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := levelToBudget(tt.level); got != tt.want { + t.Errorf("levelToBudget(%q) = %d, want %d", tt.level, got, tt.want) + } + }) + } +} + +func TestBuildParams_ThinkingClearsTemperature(t *testing.T) { + msgs := []Message{{Role: "user", Content: "hello"}} + opts := map[string]any{ + "max_tokens": 200000, + "temperature": 0.8, + "thinking_level": "medium", + } + + params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts) + if err != nil { + t.Fatal(err) + } + + if params.Temperature.Valid() { + t.Error("temperature should be cleared when thinking_level is set") + } + if params.Thinking.OfEnabled == nil { + t.Fatal("expected enabled thinking") + } + if params.Thinking.OfEnabled.BudgetTokens != 16384 { + t.Errorf("budget_tokens = %d, want 16384", params.Thinking.OfEnabled.BudgetTokens) + } +} + +// unmarshalBlocks constructs []ContentBlockUnion via JSON round-trip so that +// the internal JSON.raw field is populated (required by AsText/AsThinking). +func unmarshalBlocks(t *testing.T, jsonStr string) []anthropic.ContentBlockUnion { + t.Helper() + var blocks []anthropic.ContentBlockUnion + if err := json.Unmarshal([]byte(jsonStr), &blocks); err != nil { + t.Fatalf("unmarshalBlocks: %v", err) + } + return blocks +} + +func TestParseResponse_ThinkingBlock(t *testing.T) { + resp := &anthropic.Message{ + Content: unmarshalBlocks(t, `[ + {"type":"thinking","thinking":"Let me reason step by step...","signature":"sig"}, + {"type":"text","text":"The answer is 42."} + ]`), + StopReason: anthropic.StopReasonEndTurn, + } + + result := parseResponse(resp) + + if result.Reasoning != "Let me reason step by step..." { + t.Errorf("Reasoning = %q, want thinking content", result.Reasoning) + } + if result.Content != "The answer is 42." { + t.Errorf("Content = %q, want text content", result.Content) + } + if result.FinishReason != "stop" { + t.Errorf("FinishReason = %q, want stop", result.FinishReason) + } +} + +func TestParseResponse_NoThinkingBlock(t *testing.T) { + resp := &anthropic.Message{ + Content: unmarshalBlocks(t, `[ + {"type":"text","text":"Just a normal response."} + ]`), + StopReason: anthropic.StopReasonEndTurn, + } + + result := parseResponse(resp) + + if result.Reasoning != "" { + t.Errorf("Reasoning = %q, want empty", result.Reasoning) + } + if result.Content != "Just a normal response." { + t.Errorf("Content = %q, want text content", result.Content) + } +} + +func TestBuildParams_NoThinkingKeepsTemperature(t *testing.T) { + msgs := []Message{{Role: "user", Content: "hello"}} + opts := map[string]any{ + "temperature": 0.8, + } + + params, err := buildParams(msgs, nil, "claude-sonnet-4-6", opts) + if err != nil { + t.Fatal(err) + } + + if !params.Temperature.Valid() { + t.Error("temperature should be preserved when thinking is not set") + } + if params.Temperature.Value != 0.8 { + t.Errorf("temperature = %f, want 0.8", params.Temperature.Value) + } +} diff --git a/pkg/providers/types.go b/pkg/providers/types.go index f0c168bc6f..68bbd1e658 100644 --- a/pkg/providers/types.go +++ b/pkg/providers/types.go @@ -37,6 +37,13 @@ type StatefulProvider interface { Close() } +// ThinkingCapable is an optional interface for providers that support +// extended thinking (e.g. Anthropic). Used by the agent loop to warn +// when thinking_level is configured but the active provider cannot use it. +type ThinkingCapable interface { + SupportsThinking() bool +} + // FailoverReason classifies why an LLM request failed for fallback decisions. type FailoverReason string