sipeed · yinwm · Mar 5, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/config/config.example.json b/config/config.example.json
@@ -20,7 +20,8 @@
       "model_name": "claude-sonnet-4.6",
       "model": "anthropic/claude-sonnet-4.6",
       "api_key": "sk-ant-your-key",
-      "api_base": "https://api.anthropic.com/v1"
+      "api_base": "https://api.anthropic.com/v1",
+      "thinking_level": "high"
     },
     {
       "model_name": "gemini",

diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go
@@ -26,6 +26,7 @@ type AgentInstance struct {
 	MaxIterations             int
 	MaxTokens                 int
 	Temperature               float64
+	ThinkingLevel             ThinkingLevel
 	ContextWindow             int
 	SummarizeMessageThreshold int
 	SummarizeTokenPercent     int
@@ -103,6 +104,12 @@ func NewAgentInstance(
 		temperature = *defaults.Temperature
 	}
 
+	var thinkingLevelStr string
+	if mc, err := cfg.GetModelConfig(model); err == nil {
+		thinkingLevelStr = mc.ThinkingLevel
+	}
+	thinkingLevel := parseThinkingLevel(thinkingLevelStr)
+
 	summarizeMessageThreshold := defaults.SummarizeMessageThreshold
 	if summarizeMessageThreshold == 0 {
 		summarizeMessageThreshold = 20
@@ -169,6 +176,7 @@ func NewAgentInstance(
 		MaxIterations:             maxIter,
 		MaxTokens:                 maxTokens,
 		Temperature:               temperature,
+		ThinkingLevel:             thinkingLevel,
 		ContextWindow:             maxTokens,
 		SummarizeMessageThreshold: summarizeMessageThreshold,
 		SummarizeTokenPercent:     summarizeTokenPercent,

diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go
@@ -771,23 +771,29 @@ func (al *AgentLoop) runLLMIteration(
 		var response *providers.LLMResponse
 		var err error
 
+		llmOpts := map[string]any{
+			"max_tokens":       agent.MaxTokens,
+			"temperature":      agent.Temperature,
+			"prompt_cache_key": agent.ID,
+		}
+		// parseThinkingLevel guarantees ThinkingOff for empty/unknown values,
+		// so checking != ThinkingOff is sufficient.
+		if agent.ThinkingLevel != ThinkingOff {
+			if tc, ok := agent.Provider.(providers.ThinkingCapable); ok && tc.SupportsThinking() {
+				llmOpts["thinking_level"] = string(agent.ThinkingLevel)
+			} else {
+				logger.WarnCF("agent", "thinking_level is set but current provider does not support it, ignoring",
+					map[string]any{"agent_id": agent.ID, "thinking_level": string(agent.ThinkingLevel)})
+			}
+		}
+
 		callLLM := func() (*providers.LLMResponse, error) {
 			if len(agent.Candidates) > 1 && al.fallback != nil {
 				fbResult, fbErr := al.fallback.Execute(
 					ctx,
 					agent.Candidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
-						return agent.Provider.Chat(
-							ctx,
-							messages,
-							providerToolDefs,
-							model,
-							map[string]any{
-								"max_tokens":       agent.MaxTokens,
-								"temperature":      agent.Temperature,
-								"prompt_cache_key": agent.ID,
-							},
-						)
+						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
 					},
 				)
 				if fbErr != nil {
@@ -803,11 +809,7 @@ func (al *AgentLoop) runLLMIteration(
 				}
 				return fbResult.Response, nil
 			}
-			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, map[string]any{
-				"max_tokens":       agent.MaxTokens,
-				"temperature":      agent.Temperature,
-				"prompt_cache_key": agent.ID,
-			})
+			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
 		}
 
 		// Retry loop for context/token errors

diff --git a/pkg/agent/thinking.go b/pkg/agent/thinking.go
@@ -0,0 +1,39 @@
+package agent
+
+import "strings"
+
+// ThinkingLevel controls how the provider sends thinking parameters.
+//
+//   - "adaptive": sends {thinking: {type: "adaptive"}} + output_config.effort (Claude 4.6+)
+//   - "low"/"medium"/"high"/"xhigh": sends {thinking: {type: "enabled", budget_tokens: N}} (all models)
+//   - "off": disables thinking
+type ThinkingLevel string
+
+const (
+	ThinkingOff      ThinkingLevel = "off"
+	ThinkingLow      ThinkingLevel = "low"
+	ThinkingMedium   ThinkingLevel = "medium"
+	ThinkingHigh     ThinkingLevel = "high"
+	ThinkingXHigh    ThinkingLevel = "xhigh"
+	ThinkingAdaptive ThinkingLevel = "adaptive"
+)
+
+// parseThinkingLevel normalizes a config string to a ThinkingLevel.
+// Case-insensitive and whitespace-tolerant for user-facing config values.
+// Returns ThinkingOff for unknown or empty values.
+func parseThinkingLevel(level string) ThinkingLevel {
+	switch strings.ToLower(strings.TrimSpace(level)) {
+	case "adaptive":
+		return ThinkingAdaptive
+	case "low":
+		return ThinkingLow
+	case "medium":
+		return ThinkingMedium
+	case "high":
+		return ThinkingHigh
+	case "xhigh":
+		return ThinkingXHigh
+	default:
+		return ThinkingOff
+	}
+}
diff --git a/pkg/agent/thinking_test.go b/pkg/agent/thinking_test.go
@@ -0,0 +1,35 @@
+package agent
+
+import "testing"
+
+func TestParseThinkingLevel(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  ThinkingLevel
+	}{
+		{"off", "off", ThinkingOff},
+		{"empty", "", ThinkingOff},
+		{"low", "low", ThinkingLow},
+		{"medium", "medium", ThinkingMedium},
+		{"high", "high", ThinkingHigh},
+		{"xhigh", "xhigh", ThinkingXHigh},
+		{"adaptive", "adaptive", ThinkingAdaptive},
+		{"unknown", "unknown", ThinkingOff},
+		// Case-insensitive and whitespace-tolerant
+		{"upper_Medium", "Medium", ThinkingMedium},
+		{"upper_HIGH", "HIGH", ThinkingHigh},
+		{"mixed_Adaptive", "Adaptive", ThinkingAdaptive},
+		{"leading_space", " high", ThinkingHigh},
+		{"trailing_space", "low ", ThinkingLow},
+		{"both_spaces", " medium ", ThinkingMedium},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := parseThinkingLevel(tt.input); got != tt.want {
+				t.Errorf("parseThinkingLevel(%q) = %q, want %q", tt.input, got, tt.want)
+			}
+		})
+	}
+}
diff --git a/pkg/config/config.go b/pkg/config/config.go
@@ -505,6 +505,7 @@ type ModelConfig struct {
 	RPM            int    `json:"rpm,omitempty"`              // Requests per minute limit
 	MaxTokensField string `json:"max_tokens_field,omitempty"` // Field name for max tokens (e.g., "max_completion_tokens")
 	RequestTimeout int    `json:"request_timeout,omitempty"`
+	ThinkingLevel  string `json:"thinking_level,omitempty"` // Extended thinking: off|low|medium|high|xhigh|adaptive
 }
 
 // Validate checks if the ModelConfig has all required fields.

diff --git a/pkg/providers/anthropic/provider.go b/pkg/providers/anthropic/provider.go
@@ -31,6 +31,9 @@ type Provider struct {
 	baseURL     string
 }
 
+// SupportsThinking implements providers.ThinkingCapable.
+func (p *Provider) SupportsThinking() bool { return true }
+
 func NewProvider(token string) *Provider {
 	return NewProviderWithBaseURL(token, "")
 }
@@ -182,9 +185,80 @@ func buildParams(
 		params.Tools = translateTools(tools)
 	}
 
+	// Extended Thinking / Adaptive Thinking
+	// The thinking_level value directly determines the API parameter format:
+	//   "adaptive" → {thinking: {type: "adaptive"}} + output_config.effort
+	//   "low/medium/high/xhigh" → {thinking: {type: "enabled", budget_tokens: N}}
+	if level, ok := options["thinking_level"].(string); ok && level != "" && level != "off" {
+		applyThinkingConfig(&params, level)
+	}
+
 	return params, nil
 }
 
+// applyThinkingConfig sets thinking parameters based on the level value.
+// "adaptive" uses the adaptive thinking API (Claude 4.6+).
+// All other levels use budget_tokens which is universally supported.
+//
+// Anthropic API constraint: temperature must not be set when thinking is enabled.
+// budget_tokens must be strictly less than max_tokens.
+func applyThinkingConfig(params *anthropic.MessageNewParams, level string) {
+	// Anthropic API rejects requests with temperature set alongside thinking.
+	// Reset to zero value (omitted from JSON serialization).
+	if params.Temperature.Valid() {
+		log.Printf("anthropic: temperature cleared because thinking is enabled (level=%s)", level)
+	}
+	params.Temperature = anthropic.MessageNewParams{}.Temperature
+
+	if level == "adaptive" {
+		adaptive := anthropic.NewThinkingConfigAdaptiveParam()
+		params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
+		params.OutputConfig = anthropic.OutputConfigParam{
+			Effort: anthropic.OutputConfigEffortHigh,
+		}
+		return
+	}
+
+	budget := int64(levelToBudget(level))
+	if budget <= 0 {
+		return
+	}
+
+	// budget_tokens must be < max_tokens; clamp to respect user's max_tokens setting.
+	if budget >= params.MaxTokens {
+		log.Printf("anthropic: budget_tokens (%d) clamped to %d (max_tokens-1)", budget, params.MaxTokens-1)
+		budget = params.MaxTokens - 1
+	} else if budget > params.MaxTokens*80/100 {
+		log.Printf("anthropic: thinking budget (%d) exceeds 80%% of max_tokens (%d), output may be truncated",
+			budget, params.MaxTokens)
+	}
+	params.Thinking = anthropic.ThinkingConfigParamOfEnabled(budget)
+}
+
+// levelToBudget maps a thinking level to budget_tokens.
+// Values are based on Anthropic's recommendations and community best practices:
+//
+//	low    =  4,096  — simple reasoning, quick debugging (Claude Code "think")
+//	medium = 16,384  — Anthropic recommended sweet spot for most tasks
+//	high   = 32,000  — complex architecture, deep analysis (diminishing returns above this)
+//	xhigh  = 64,000  — extreme reasoning, research problems, benchmarks
+//
+// Note: For Claude 4.6+, prefer adaptive thinking over manual budget_tokens.
+func levelToBudget(level string) int {
+	switch level {
+	case "low":
+		return 4096
+	case "medium":
+		return 16384
+	case "high":
+		return 32000
+	case "xhigh":
+		return 64000
+	default:
+		return 0
+	}
+}
+
 func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
 	result := make([]anthropic.ToolUnionParam, 0, len(tools))
 	for _, t := range tools {
@@ -213,10 +287,14 @@ func translateTools(tools []ToolDefinition) []anthropic.ToolUnionParam {
 
 func parseResponse(resp *anthropic.Message) *LLMResponse {
 	var content strings.Builder
+	var reasoning strings.Builder
 	var toolCalls []ToolCall
 
 	for _, block := range resp.Content {
 		switch block.Type {
+		case "thinking":
+			tb := block.AsThinking()
+			reasoning.WriteString(tb.Thinking)
 		case "text":
 			tb := block.AsText()
 			content.WriteString(tb.Text)
@@ -247,6 +325,7 @@ func parseResponse(resp *anthropic.Message) *LLMResponse {
 
 	return &LLMResponse{
 		Content:      content.String(),
+		Reasoning:    reasoning.String(),
 		ToolCalls:    toolCalls,
 		FinishReason: finishReason,
 		Usage: &UsageInfo{