sipeed · mengzhuo · Mar 6, 2026 · Mar 2, 2026 · Mar 2, 2026 · Mar 2, 2026
diff --git a/pkg/agent/instance.go b/pkg/agent/instance.go
@@ -37,6 +37,14 @@ type AgentInstance struct {
 	Subagents                 *config.SubagentsConfig
 	SkillsFilter              []string
 	Candidates                []providers.FallbackCandidate
+
+	// Router is non-nil when model routing is configured and the light model
+	// was successfully resolved. It scores each incoming message and decides
+	// whether to route to LightCandidates or stay with Candidates.
+	Router *routing.Router
+	// LightCandidates holds the resolved provider candidates for the light model.
+	// Pre-computed at agent creation to avoid repeated model_list lookups at runtime.
+	LightCandidates []providers.FallbackCandidate
 }
 
 // NewAgentInstance creates an agent instance from config.
@@ -180,6 +188,25 @@ func NewAgentInstance(
 
 	candidates := providers.ResolveCandidatesWithLookup(modelCfg, defaults.Provider, resolveFromModelList)
 
+	// Model routing setup: pre-resolve light model candidates at creation time
+	// to avoid repeated model_list lookups on every incoming message.
+	var router *routing.Router
+	var lightCandidates []providers.FallbackCandidate
+	if rc := defaults.Routing; rc != nil && rc.Enabled && rc.LightModel != "" {
+		lightModelCfg := providers.ModelConfig{Primary: rc.LightModel}
+		resolved := providers.ResolveCandidatesWithLookup(lightModelCfg, defaults.Provider, resolveFromModelList)
+		if len(resolved) > 0 {
+			router = routing.New(routing.RouterConfig{
+				LightModel: rc.LightModel,
+				Threshold:  rc.Threshold,
+			})
+			lightCandidates = resolved
+		} else {
+			log.Printf("routing: light_model %q not found in model_list — routing disabled for agent %q",
+				rc.LightModel, agentID)
+		}
+	}
+
 	return &AgentInstance{
 		ID:                        agentID,
 		Name:                      agentName,
@@ -200,6 +227,8 @@ func NewAgentInstance(
 		Subagents:                 subagents,
 		SkillsFilter:              skillsFilter,
 		Candidates:                candidates,
+		Router:                    router,
+		LightCandidates:           lightCandidates,
 	}
 }
 

diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go
@@ -824,6 +824,12 @@ func (al *AgentLoop) runLLMIteration(
 	iteration := 0
 	var finalContent string
 
+	// Determine effective model tier for this conversation turn.
+	// selectCandidates evaluates routing once and the decision is sticky for
+	// all tool-follow-up iterations within the same turn so that a multi-step
+	// tool chain doesn't switch models mid-way through.
+	activeCandidates, activeModel := al.selectCandidates(agent, opts.UserMessage, messages)
+
 	for iteration < agent.MaxIterations {
 		iteration++
 
@@ -842,7 +848,7 @@ func (al *AgentLoop) runLLMIteration(
 			map[string]any{
 				"agent_id":          agent.ID,
 				"iteration":         iteration,
-				"model":             agent.Model,
+				"model":             activeModel,
 				"messages_count":    len(messages),
 				"tools_count":       len(providerToolDefs),
 				"max_tokens":        agent.MaxTokens,
@@ -858,7 +864,7 @@ func (al *AgentLoop) runLLMIteration(
 				"tools_json":    formatToolsForLog(providerToolDefs),
 			})
 
-		// Call LLM with fallback chain if candidates are configured.
+		// Call LLM with fallback chain if multiple candidates are configured.
 		var response *providers.LLMResponse
 		var err error
 
@@ -879,10 +885,10 @@ func (al *AgentLoop) runLLMIteration(
 		}
 
 		callLLM := func() (*providers.LLMResponse, error) {
-			if len(agent.Candidates) > 1 && al.fallback != nil {
+			if len(activeCandidates) > 1 && al.fallback != nil {
 				fbResult, fbErr := al.fallback.Execute(
 					ctx,
-					agent.Candidates,
+					activeCandidates,
 					func(ctx context.Context, provider, model string) (*providers.LLMResponse, error) {
 						return agent.Provider.Chat(ctx, messages, providerToolDefs, model, llmOpts)
 					},
@@ -900,7 +906,7 @@ func (al *AgentLoop) runLLMIteration(
 				}
 				return fbResult.Response, nil
 			}
-			return agent.Provider.Chat(ctx, messages, providerToolDefs, agent.Model, llmOpts)
+			return agent.Provider.Chat(ctx, messages, providerToolDefs, activeModel, llmOpts)
 		}
 
 		// Retry loop for context/token errors
@@ -1169,6 +1175,44 @@ func (al *AgentLoop) runLLMIteration(
 	return finalContent, iteration, nil
 }
 
+// selectCandidates returns the model candidates and resolved model name to use
+// for a conversation turn. When model routing is configured and the incoming
+// message scores below the complexity threshold, it returns the light model
+// candidates instead of the primary ones.
+//
+// The returned (candidates, model) pair is used for all LLM calls within one
+// turn — tool follow-up iterations use the same tier as the initial call so
+// that a multi-step tool chain doesn't switch models mid-way.
+func (al *AgentLoop) selectCandidates(
+	agent *AgentInstance,
+	userMsg string,
+	history []providers.Message,
+) (candidates []providers.FallbackCandidate, model string) {
+	if agent.Router == nil || len(agent.LightCandidates) == 0 {
+		return agent.Candidates, agent.Model
+	}
+
+	_, usedLight, score := agent.Router.SelectModel(userMsg, history, agent.Model)
+	if !usedLight {
+		logger.DebugCF("agent", "Model routing: primary model selected",
+			map[string]any{
+				"agent_id":  agent.ID,
+				"score":     score,
+				"threshold": agent.Router.Threshold(),
+			})
+		return agent.Candidates, agent.Model
+	}
+
+	logger.InfoCF("agent", "Model routing: light model selected",
+		map[string]any{
+			"agent_id":    agent.ID,
+			"light_model": agent.Router.LightModel(),
+			"score":       score,
+			"threshold":   agent.Router.Threshold(),
+		})
+	return agent.LightCandidates, agent.Router.LightModel()
+}
+
 // maybeSummarize triggers summarization if the session history exceeds thresholds.
 func (al *AgentLoop) maybeSummarize(agent *AgentInstance, sessionKey, channel, chatID string) {
 	newHistory := agent.Sessions.GetHistory(sessionKey)

diff --git a/pkg/config/config.go b/pkg/config/config.go
@@ -167,22 +167,35 @@ type SessionConfig struct {
 	IdentityLinks map[string][]string `json:"identity_links,omitempty"`
 }
 
+// RoutingConfig controls the intelligent model routing feature.
+// When enabled, each incoming message is scored against structural features
+// (message length, code blocks, tool call history, conversation depth, attachments).
+// Messages scoring below Threshold are sent to LightModel; all others use the
+// agent's primary model. This reduces cost and latency for simple tasks without
+// requiring any keyword matching — all scoring is language-agnostic.
+type RoutingConfig struct {
+	Enabled    bool    `json:"enabled"`
+	LightModel string  `json:"light_model"` // model_name from model_list to use for simple tasks
+	Threshold  float64 `json:"threshold"`   // complexity score in [0,1]; score >= threshold → primary model
+}
+
 type AgentDefaults struct {
-	Workspace                 string   `json:"workspace"                       env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"`
-	RestrictToWorkspace       bool     `json:"restrict_to_workspace"           env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"`
-	AllowReadOutsideWorkspace bool     `json:"allow_read_outside_workspace"    env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"`
-	Provider                  string   `json:"provider"                        env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"`
-	ModelName                 string   `json:"model_name,omitempty"            env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"`
-	Model                     string   `json:"model"                           env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead
-	ModelFallbacks            []string `json:"model_fallbacks,omitempty"`
-	ImageModel                string   `json:"image_model,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"`
-	ImageModelFallbacks       []string `json:"image_model_fallbacks,omitempty"`
-	MaxTokens                 int      `json:"max_tokens"                      env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"`
-	Temperature               *float64 `json:"temperature,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"`
-	MaxToolIterations         int      `json:"max_tool_iterations"             env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"`
-	SummarizeMessageThreshold int      `json:"summarize_message_threshold"     env:"PICOCLAW_AGENTS_DEFAULTS_SUMMARIZE_MESSAGE_THRESHOLD"`
-	SummarizeTokenPercent     int      `json:"summarize_token_percent"         env:"PICOCLAW_AGENTS_DEFAULTS_SUMMARIZE_TOKEN_PERCENT"`
-	MaxMediaSize              int      `json:"max_media_size,omitempty"        env:"PICOCLAW_AGENTS_DEFAULTS_MAX_MEDIA_SIZE"`
+	Workspace                 string         `json:"workspace"                       env:"PICOCLAW_AGENTS_DEFAULTS_WORKSPACE"`
+	RestrictToWorkspace       bool           `json:"restrict_to_workspace"           env:"PICOCLAW_AGENTS_DEFAULTS_RESTRICT_TO_WORKSPACE"`
+	AllowReadOutsideWorkspace bool           `json:"allow_read_outside_workspace"    env:"PICOCLAW_AGENTS_DEFAULTS_ALLOW_READ_OUTSIDE_WORKSPACE"`
+	Provider                  string         `json:"provider"                        env:"PICOCLAW_AGENTS_DEFAULTS_PROVIDER"`
+	ModelName                 string         `json:"model_name,omitempty"            env:"PICOCLAW_AGENTS_DEFAULTS_MODEL_NAME"`
+	Model                     string         `json:"model"                           env:"PICOCLAW_AGENTS_DEFAULTS_MODEL"` // Deprecated: use model_name instead
+	ModelFallbacks            []string       `json:"model_fallbacks,omitempty"`
+	ImageModel                string         `json:"image_model,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_IMAGE_MODEL"`
+	ImageModelFallbacks       []string       `json:"image_model_fallbacks,omitempty"`
+	MaxTokens                 int            `json:"max_tokens"                      env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"`
+	Temperature               *float64       `json:"temperature,omitempty"           env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"`
+	MaxToolIterations         int            `json:"max_tool_iterations"             env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"`
+	SummarizeMessageThreshold int            `json:"summarize_message_threshold"     env:"PICOCLAW_AGENTS_DEFAULTS_SUMMARIZE_MESSAGE_THRESHOLD"`
+	SummarizeTokenPercent     int            `json:"summarize_token_percent"         env:"PICOCLAW_AGENTS_DEFAULTS_SUMMARIZE_TOKEN_PERCENT"`
+	MaxMediaSize              int            `json:"max_media_size,omitempty"        env:"PICOCLAW_AGENTS_DEFAULTS_MAX_MEDIA_SIZE"`
+	Routing                   *RoutingConfig `json:"routing,omitempty"`
 }
 
 const DefaultMaxMediaSize = 20 * 1024 * 1024 // 20 MB

diff --git a/pkg/routing/classifier.go b/pkg/routing/classifier.go
@@ -0,0 +1,80 @@
+package routing
+
+// Classifier evaluates a feature set and returns a complexity score in [0, 1].
+// A higher score indicates a more complex task that benefits from a heavy model.
+// The score is compared against the configured threshold: score >= threshold selects
+// the primary (heavy) model; score < threshold selects the light model.
+//
+// Classifier is an interface so that future implementations (ML-based, embedding-based,
+// or any other approach) can be swapped in without changing routing infrastructure.
+type Classifier interface {
+	Score(f Features) float64
+}
+
+// RuleClassifier is the v1 implementation.
+// It uses a weighted sum of structural signals with no external dependencies,
+// no API calls, and sub-microsecond latency. The raw sum is capped at 1.0 so
+// that the returned score always falls within the [0, 1] contract.
+//
+// Individual weights (multiple signals can fire simultaneously):
+//
+//	token > 200 (≈600 chars): 0.35  — very long prompts are almost always complex
+//	token 50-200:             0.15  — medium length; may or may not be complex
+//	code block present:       0.40  — coding tasks need the heavy model
+//	tool calls > 3 (recent):  0.25  — dense tool usage signals an agentic workflow
+//	tool calls 1-3 (recent):  0.10  — some tool activity
+//	conversation depth > 10:  0.10  — long sessions carry implicit complexity
+//	attachments present:      1.00  — hard gate; multi-modal always needs heavy model
+//
+// Default threshold is 0.35, so:
+//   - Pure greetings / trivial Q&A:                 0.00 → light  ✓
+//   - Medium prose message (50–200 tokens):          0.15 → light  ✓
+//   - Message with code block:                       0.40 → heavy  ✓
+//   - Long message (>200 tokens):                    0.35 → heavy  ✓
+//   - Active tool session + medium message:          0.25 → light  (acceptable)
+//   - Any message with an image/audio attachment:    1.00 → heavy  ✓
+type RuleClassifier struct{}
+
+// Score computes the complexity score for the given feature set.
+// The returned value is in [0, 1]. Attachments short-circuit to 1.0.
+func (c *RuleClassifier) Score(f Features) float64 {
+	// Hard gate: multi-modal inputs always require the heavy model.
+	if f.HasAttachments {
+		return 1.0
+	}
+
+	var score float64
+
+	// Token estimate — primary verbosity signal
+	switch {
+	case f.TokenEstimate > 200:
+		score += 0.35
+	case f.TokenEstimate > 50:
+		score += 0.15
+	}
+
+	// Fenced code blocks — strongest indicator of a coding/technical task
+	if f.CodeBlockCount > 0 {
+		score += 0.40
+	}
+
+	// Recent tool call density — indicates an ongoing agentic workflow
+	switch {
+	case f.RecentToolCalls > 3:
+		score += 0.25
+	case f.RecentToolCalls > 0:
+		score += 0.10
+	}
+
+	// Conversation depth — accumulated context implies compound task
+	if f.ConversationDepth > 10 {
+		score += 0.10
+	}
+
+	// Cap at 1.0 to honor the [0, 1] contract even when multiple signals fire
+	// simultaneously (e.g., long message + code block + tool chain = 1.10 raw).
+	if score > 1.0 {
+		score = 1.0
+	}
+	return score
+}