From 6689c0b1c068a8149cb801b39cba959948a88272 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 13:18:21 +0800 Subject: [PATCH 1/7] feat(providers): add Media field to Message struct for vision support Co-Authored-By: Claude Opus 4.6 --- pkg/providers/protocoltypes/types.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/providers/protocoltypes/types.go b/pkg/providers/protocoltypes/types.go index 99f13334e7..194c1aa6fd 100644 --- a/pkg/providers/protocoltypes/types.go +++ b/pkg/providers/protocoltypes/types.go @@ -65,6 +65,7 @@ type ContentBlock struct { type Message struct { Role string `json:"role"` Content string `json:"content"` + Media []string `json:"media,omitempty"` ReasoningContent string `json:"reasoning_content,omitempty"` SystemParts []ContentBlock `json:"system_parts,omitempty"` // structured system blocks for cache-aware adapters ToolCalls []ToolCall `json:"tool_calls,omitempty"` From 4c6c05a251895ad28cb7329664a3d028aa3d5948 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 13:20:44 +0800 Subject: [PATCH 2/7] feat(config): add configurable max_media_size with 20MB default Co-Authored-By: Claude Opus 4.6 --- pkg/config/config.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/config/config.go b/pkg/config/config.go index 305ae67e36..eeca9638ef 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -180,6 +180,16 @@ type AgentDefaults struct { MaxTokens int `json:"max_tokens" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOKENS"` Temperature *float64 `json:"temperature,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_TEMPERATURE"` MaxToolIterations int `json:"max_tool_iterations" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_TOOL_ITERATIONS"` + MaxMediaSize int `json:"max_media_size,omitempty" env:"PICOCLAW_AGENTS_DEFAULTS_MAX_MEDIA_SIZE"` +} + +const DefaultMaxMediaSize = 20 * 1024 * 1024 // 20 MB + +func (d *AgentDefaults) GetMaxMediaSize() int { + if d.MaxMediaSize > 0 { + return d.MaxMediaSize + } + return DefaultMaxMediaSize } // GetModelName returns the effective model name for the agent defaults. From 559cef3d5b27d8f5f13cc839c43dac91f853041c Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 13:23:18 +0800 Subject: [PATCH 3/7] chore: add h2non/filetype dependency for magic-bytes MIME detection Co-Authored-By: Claude Opus 4.6 --- go.mod | 2 ++ go.sum | 2 ++ 2 files changed, 4 insertions(+) diff --git a/go.mod b/go.mod index 1c699a7243..c1172937cb 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,8 @@ require ( github.com/dustin/go-humanize v1.0.1 // indirect github.com/elliotchance/orderedmap/v3 v3.1.0 // indirect github.com/gdamore/encoding v1.0.1 // indirect + github.com/gdamore/tcell/v2 v2.13.8 // indirect + github.com/h2non/filetype v1.1.3 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect diff --git a/go.sum b/go.sum index 9041826a5c..060594d06b 100644 --- a/go.sum +++ b/go.sum @@ -98,6 +98,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grbit/go-json v0.11.0 h1:bAbyMdYrYl/OjYsSqLH99N2DyQ291mHy726Mx+sYrnc= github.com/grbit/go-json v0.11.0/go.mod h1:IYpHsdybQ386+6g3VE6AXQ3uTGa5mquBme5/ZWmtzek= +github.com/h2non/filetype v1.1.3 h1:FKkx9QbD7HR/zjK1Ia5XiBsq9zdLi5Kf3zGyFTAFkGg= +github.com/h2non/filetype v1.1.3/go.mod h1:319b3zT68BvV+WRj7cwy856M2ehB3HqNOt6sy1HndBY= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= From 6fd65825e731cf27a24b477bb5516dd2f2ea52b2 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 14:01:52 +0800 Subject: [PATCH 4/7] feat(agent): implement resolveMediaRefs with streaming base64 and filetype detection Co-Authored-By: Claude Opus 4.6 --- pkg/agent/loop_media.go | 121 ++++++++++++++++++++++++++++++++++ pkg/agent/loop_test.go | 140 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) create mode 100644 pkg/agent/loop_media.go diff --git a/pkg/agent/loop_media.go b/pkg/agent/loop_media.go new file mode 100644 index 0000000000..813feef696 --- /dev/null +++ b/pkg/agent/loop_media.go @@ -0,0 +1,121 @@ +// PicoClaw - Ultra-lightweight personal AI agent +// Inspired by and based on nanobot: https://github.com/HKUDS/nanobot +// License: MIT +// +// Copyright (c) 2026 PicoClaw contributors + +package agent + +import ( + "bytes" + "encoding/base64" + "io" + "os" + "strings" + + "github.com/h2non/filetype" + "github.com/sipeed/picoclaw/pkg/logger" + "github.com/sipeed/picoclaw/pkg/media" + "github.com/sipeed/picoclaw/pkg/providers" +) + +// resolveMediaRefs replaces media:// refs in message Media fields with base64 data URLs. +// Uses streaming base64 encoding (file handle → encoder → buffer) to avoid holding +// both raw bytes and encoded string in memory simultaneously. +// Returns a new slice; original messages are not mutated. +func resolveMediaRefs(messages []providers.Message, store media.MediaStore, maxSize int) []providers.Message { + if store == nil { + return messages + } + + result := make([]providers.Message, len(messages)) + copy(result, messages) + + for i, m := range result { + if len(m.Media) == 0 { + continue + } + + resolved := make([]string, 0, len(m.Media)) + for _, ref := range m.Media { + if !strings.HasPrefix(ref, "media://") { + resolved = append(resolved, ref) + continue + } + + localPath, meta, err := store.ResolveWithMeta(ref) + if err != nil { + logger.WarnCF("agent", "Failed to resolve media ref", map[string]any{ + "ref": ref, + "error": err.Error(), + }) + continue + } + + info, err := os.Stat(localPath) + if err != nil { + logger.WarnCF("agent", "Failed to stat media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + if info.Size() > int64(maxSize) { + logger.WarnCF("agent", "Media file too large, skipping", map[string]any{ + "path": localPath, + "size": info.Size(), + "max_size": maxSize, + }) + continue + } + + // Determine MIME type: prefer metadata, fallback to magic-bytes detection + mime := meta.ContentType + if mime == "" { + kind, err := filetype.MatchFile(localPath) + if err != nil || kind == filetype.Unknown { + logger.WarnCF("agent", "Unknown media type, skipping", map[string]any{ + "path": localPath, + }) + continue + } + mime = kind.MIME.Value + } + + // Streaming base64: open file → base64 encoder → buffer + // Peak memory: ~1.33x file size (buffer only, no raw bytes copy) + f, err := os.Open(localPath) + if err != nil { + logger.WarnCF("agent", "Failed to open media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + + prefix := "data:" + mime + ";base64," + encodedLen := base64.StdEncoding.EncodedLen(int(info.Size())) + var buf bytes.Buffer + buf.Grow(len(prefix) + encodedLen) + buf.WriteString(prefix) + + encoder := base64.NewEncoder(base64.StdEncoding, &buf) + if _, err := io.Copy(encoder, f); err != nil { + f.Close() + logger.WarnCF("agent", "Failed to encode media file", map[string]any{ + "path": localPath, + "error": err.Error(), + }) + continue + } + encoder.Close() + f.Close() + + resolved = append(resolved, buf.String()) + } + + result[i].Media = resolved + } + + return result +} diff --git a/pkg/agent/loop_test.go b/pkg/agent/loop_test.go index 3565314fea..4076c6e7c4 100644 --- a/pkg/agent/loop_test.go +++ b/pkg/agent/loop_test.go @@ -6,12 +6,14 @@ import ( "os" "path/filepath" "slices" + "strings" "testing" "time" "github.com/sipeed/picoclaw/pkg/bus" "github.com/sipeed/picoclaw/pkg/channels" "github.com/sipeed/picoclaw/pkg/config" + "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/providers" "github.com/sipeed/picoclaw/pkg/tools" ) @@ -808,3 +810,141 @@ func TestHandleReasoning(t *testing.T) { } }) } + +func TestResolveMediaRefs_ResolvesToBase64(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + // Create a minimal valid PNG (8-byte header is enough for filetype detection) + pngPath := filepath.Join(dir, "test.png") + // PNG magic: 0x89 P N G \r \n 0x1A \n + minimal IHDR + pngHeader := []byte{ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, // IHDR length + 0x49, 0x48, 0x44, 0x52, // "IHDR" + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, // 1x1 RGB + 0x00, 0x00, 0x00, // no interlace + 0x90, 0x77, 0x53, 0xDE, // CRC + } + if err := os.WriteFile(pngPath, pngHeader, 0o644); err != nil { + t.Fatal(err) + } + ref, err := store.Store(pngPath, media.MediaMeta{}, "test") + if err != nil { + t.Fatal(err) + } + + messages := []providers.Message{ + {Role: "user", Content: "describe this", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 { + t.Fatalf("expected 1 resolved media, got %d", len(result[0].Media)) + } + if !strings.HasPrefix(result[0].Media[0], "data:image/png;base64,") { + t.Fatalf("expected data:image/png;base64, prefix, got %q", result[0].Media[0][:40]) + } +} + +func TestResolveMediaRefs_SkipsOversizedFile(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + bigPath := filepath.Join(dir, "big.png") + // Write PNG header + padding to exceed limit + data := make([]byte, 1024+1) // 1KB + 1 byte + copy(data, []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}) + if err := os.WriteFile(bigPath, data, 0o644); err != nil { + t.Fatal(err) + } + ref, _ := store.Store(bigPath, media.MediaMeta{}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + // Use a tiny limit (1KB) so the file is oversized + result := resolveMediaRefs(messages, store, 1024) + + if len(result[0].Media) != 0 { + t.Fatalf("expected 0 media (oversized), got %d", len(result[0].Media)) + } +} + +func TestResolveMediaRefs_SkipsUnknownType(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + txtPath := filepath.Join(dir, "readme.txt") + if err := os.WriteFile(txtPath, []byte("hello world"), 0o644); err != nil { + t.Fatal(err) + } + ref, _ := store.Store(txtPath, media.MediaMeta{}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 0 { + t.Fatalf("expected 0 media (unknown type), got %d", len(result[0].Media)) + } +} + +func TestResolveMediaRefs_PassesThroughNonMediaRefs(t *testing.T) { + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{"https://example.com/img.png"}}, + } + result := resolveMediaRefs(messages, nil, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 || result[0].Media[0] != "https://example.com/img.png" { + t.Fatalf("expected passthrough of non-media:// URL, got %v", result[0].Media) + } +} + +func TestResolveMediaRefs_DoesNotMutateOriginal(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + pngPath := filepath.Join(dir, "test.png") + pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, + 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, + 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xDE} + os.WriteFile(pngPath, pngHeader, 0o644) + ref, _ := store.Store(pngPath, media.MediaMeta{}, "test") + + original := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + originalRef := original[0].Media[0] + + resolveMediaRefs(original, store, config.DefaultMaxMediaSize) + + if original[0].Media[0] != originalRef { + t.Fatal("resolveMediaRefs mutated original message slice") + } +} + +func TestResolveMediaRefs_UsesMetaContentType(t *testing.T) { + store := media.NewFileMediaStore() + dir := t.TempDir() + + // File with JPEG content but stored with explicit content type + jpegPath := filepath.Join(dir, "photo") + jpegHeader := []byte{0xFF, 0xD8, 0xFF, 0xE0} // JPEG magic bytes + os.WriteFile(jpegPath, jpegHeader, 0o644) + ref, _ := store.Store(jpegPath, media.MediaMeta{ContentType: "image/jpeg"}, "test") + + messages := []providers.Message{ + {Role: "user", Content: "hi", Media: []string{ref}}, + } + result := resolveMediaRefs(messages, store, config.DefaultMaxMediaSize) + + if len(result[0].Media) != 1 { + t.Fatalf("expected 1 media, got %d", len(result[0].Media)) + } + if !strings.HasPrefix(result[0].Media[0], "data:image/jpeg;base64,") { + t.Fatalf("expected jpeg prefix, got %q", result[0].Media[0][:30]) + } +} + From 03f7ae494f348ada4a7327486010a4ea88d05d9d Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 14:38:39 +0800 Subject: [PATCH 5/7] feat(openai_compat): implement serializeMessages with multipart media support Co-Authored-By: Claude Opus 4.6 --- pkg/providers/openai_compat/provider.go | 62 ++++++++++--- pkg/providers/openai_compat/provider_test.go | 98 ++++++++++++++++++++ 2 files changed, 147 insertions(+), 13 deletions(-) diff --git a/pkg/providers/openai_compat/provider.go b/pkg/providers/openai_compat/provider.go index 3a18b8b16c..ff9109e969 100644 --- a/pkg/providers/openai_compat/provider.go +++ b/pkg/providers/openai_compat/provider.go @@ -116,7 +116,7 @@ func (p *Provider) Chat( requestBody := map[string]any{ "model": model, - "messages": stripSystemParts(messages), + "messages": serializeMessages(messages), } if len(tools) > 0 { @@ -296,19 +296,55 @@ type openaiMessage struct { ToolCallID string `json:"tool_call_id,omitempty"` } -// stripSystemParts converts []Message to []openaiMessage, dropping the -// SystemParts field so it doesn't leak into the JSON payload sent to -// OpenAI-compatible APIs (some strict endpoints reject unknown fields). -func stripSystemParts(messages []Message) []openaiMessage { - out := make([]openaiMessage, len(messages)) - for i, m := range messages { - out[i] = openaiMessage{ - Role: m.Role, - Content: m.Content, - ReasoningContent: m.ReasoningContent, - ToolCalls: m.ToolCalls, - ToolCallID: m.ToolCallID, +// serializeMessages converts internal Message structs to the OpenAI wire format. +// - Strips SystemParts (unknown to third-party endpoints) +// - Converts messages with Media to multipart content format (text + image_url parts) +// - Preserves ToolCallID, ToolCalls, and ReasoningContent for all messages +func serializeMessages(messages []Message) []any { + out := make([]any, 0, len(messages)) + for _, m := range messages { + if len(m.Media) == 0 { + out = append(out, openaiMessage{ + Role: m.Role, + Content: m.Content, + ReasoningContent: m.ReasoningContent, + ToolCalls: m.ToolCalls, + ToolCallID: m.ToolCallID, + }) + continue } + + // Multipart content format for messages with media + parts := make([]map[string]any, 0, 1+len(m.Media)) + if m.Content != "" { + parts = append(parts, map[string]any{ + "type": "text", + "text": m.Content, + }) + } + for _, mediaURL := range m.Media { + parts = append(parts, map[string]any{ + "type": "image_url", + "image_url": map[string]any{ + "url": mediaURL, + }, + }) + } + + msg := map[string]any{ + "role": m.Role, + "content": parts, + } + if m.ToolCallID != "" { + msg["tool_call_id"] = m.ToolCallID + } + if len(m.ToolCalls) > 0 { + msg["tool_calls"] = m.ToolCalls + } + if m.ReasoningContent != "" { + msg["reasoning_content"] = m.ReasoningContent + } + out = append(out, msg) } return out } diff --git a/pkg/providers/openai_compat/provider_test.go b/pkg/providers/openai_compat/provider_test.go index 53b9e75ee3..9d3b91a1af 100644 --- a/pkg/providers/openai_compat/provider_test.go +++ b/pkg/providers/openai_compat/provider_test.go @@ -5,8 +5,11 @@ import ( "net/http" "net/http/httptest" "net/url" + "strings" "testing" "time" + + "github.com/sipeed/picoclaw/pkg/providers/protocoltypes" ) func TestProviderChat_UsesMaxCompletionTokensForGLM(t *testing.T) { @@ -416,3 +419,98 @@ func TestProvider_FunctionalOptionRequestTimeoutNonPositive(t *testing.T) { t.Fatalf("http timeout = %v, want %v", p.httpClient.Timeout, defaultRequestTimeout) } } + +func TestSerializeMessages_PlainText(t *testing.T) { + messages := []protocoltypes.Message{ + {Role: "user", Content: "hello"}, + {Role: "assistant", Content: "hi", ReasoningContent: "thinking..."}, + } + result := serializeMessages(messages) + + data, err := json.Marshal(result) + if err != nil { + t.Fatal(err) + } + + var msgs []map[string]any + json.Unmarshal(data, &msgs) + + if msgs[0]["content"] != "hello" { + t.Fatalf("expected plain string content, got %v", msgs[0]["content"]) + } + if msgs[1]["reasoning_content"] != "thinking..." { + t.Fatalf("reasoning_content not preserved, got %v", msgs[1]["reasoning_content"]) + } +} + +func TestSerializeMessages_WithMedia(t *testing.T) { + messages := []protocoltypes.Message{ + {Role: "user", Content: "describe this", Media: []string{"data:image/png;base64,abc123"}}, + } + result := serializeMessages(messages) + + data, _ := json.Marshal(result) + var msgs []map[string]any + json.Unmarshal(data, &msgs) + + content, ok := msgs[0]["content"].([]any) + if !ok { + t.Fatalf("expected array content for media message, got %T", msgs[0]["content"]) + } + if len(content) != 2 { + t.Fatalf("expected 2 content parts, got %d", len(content)) + } + + textPart := content[0].(map[string]any) + if textPart["type"] != "text" || textPart["text"] != "describe this" { + t.Fatalf("text part mismatch: %v", textPart) + } + + imgPart := content[1].(map[string]any) + if imgPart["type"] != "image_url" { + t.Fatalf("expected image_url type, got %v", imgPart["type"]) + } + imgURL := imgPart["image_url"].(map[string]any) + if imgURL["url"] != "data:image/png;base64,abc123" { + t.Fatalf("image url mismatch: %v", imgURL["url"]) + } +} + +func TestSerializeMessages_MediaWithToolCallID(t *testing.T) { + messages := []protocoltypes.Message{ + {Role: "tool", Content: "image result", Media: []string{"data:image/png;base64,xyz"}, ToolCallID: "call_1"}, + } + result := serializeMessages(messages) + + data, _ := json.Marshal(result) + var msgs []map[string]any + json.Unmarshal(data, &msgs) + + if msgs[0]["tool_call_id"] != "call_1" { + t.Fatalf("tool_call_id not preserved with media, got %v", msgs[0]["tool_call_id"]) + } + // Content should be multipart array + if _, ok := msgs[0]["content"].([]any); !ok { + t.Fatalf("expected array content, got %T", msgs[0]["content"]) + } +} + +func TestSerializeMessages_StripsSystemParts(t *testing.T) { + messages := []protocoltypes.Message{ + { + Role: "system", + Content: "you are helpful", + SystemParts: []protocoltypes.ContentBlock{ + {Type: "text", Text: "you are helpful"}, + }, + }, + } + result := serializeMessages(messages) + + data, _ := json.Marshal(result) + raw := string(data) + if strings.Contains(raw, "system_parts") { + t.Fatal("system_parts should not appear in serialized output") + } +} + From 43227411eedde9c35aa5de37d3f12b0745bf2cf9 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 14:52:57 +0800 Subject: [PATCH 6/7] feat(agent): wire media refs through agent pipeline to LLM provider Co-Authored-By: Claude Opus 4.6 --- pkg/agent/context.go | 8 ++++++-- pkg/agent/loop.go | 24 +++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pkg/agent/context.go b/pkg/agent/context.go index 6fccbaf53f..8a35d4457b 100644 --- a/pkg/agent/context.go +++ b/pkg/agent/context.go @@ -466,10 +466,14 @@ func (cb *ContextBuilder) BuildMessages( // Add current user message if strings.TrimSpace(currentMessage) != "" { - messages = append(messages, providers.Message{ + msg := providers.Message{ Role: "user", Content: currentMessage, - }) + } + if len(media) > 0 { + msg.Media = media + } + messages = append(messages, msg) } return messages diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index ac9b449a21..b803187b13 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -47,14 +47,15 @@ type AgentLoop struct { // processOptions configures how a message is processed type processOptions struct { - SessionKey string // Session identifier for history/context - Channel string // Target channel for tool execution - ChatID string // Target chat ID for tool execution - UserMessage string // User message content (may include prefix) - DefaultResponse string // Response when LLM returns empty - EnableSummary bool // Whether to trigger summarization - SendResponse bool // Whether to send response via bus - NoHistory bool // If true, don't load session history (for heartbeat) + SessionKey string // Session identifier for history/context + Channel string // Target channel for tool execution + ChatID string // Target chat ID for tool execution + UserMessage string // User message content (may include prefix) + Media []string // media:// refs from inbound message + DefaultResponse string // Response when LLM returns empty + EnableSummary bool // Whether to trigger summarization + SendResponse bool // Whether to send response via bus + NoHistory bool // If true, don't load session history (for heartbeat) } const defaultResponse = "I've completed processing but have no response to give. Increase `max_tool_iterations` in config.json." @@ -497,6 +498,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) Channel: msg.Channel, ChatID: msg.ChatID, UserMessage: msg.Content, + Media: msg.Media, DefaultResponse: defaultResponse, EnableSummary: true, SendResponse: false, @@ -603,11 +605,15 @@ func (al *AgentLoop) runAgentLoop( history, summary, opts.UserMessage, - nil, + opts.Media, opts.Channel, opts.ChatID, ) + // Resolve media:// refs to base64 data URLs (streaming) + maxMediaSize := al.cfg.Agents.Defaults.GetMaxMediaSize() + messages = resolveMediaRefs(messages, al.mediaStore, maxMediaSize) + // 3. Save user message to session agent.Sessions.AddMessage(opts.SessionKey, "user", opts.UserMessage) From 6ccb68c63e8daccec339f6ec02b7b87ecc8334b1 Mon Sep 17 00:00:00 2001 From: shikihane Date: Tue, 3 Mar 2026 17:04:13 +0800 Subject: [PATCH 7/7] fix: resolve linter issues (gci import grouping, gofumpt, govet shadow) - Separate third-party imports from local module imports (gci) - Fix byte slice literal formatting (gofumpt) - Rename shadowed err variable to ftErr (govet) - Remove trailing blank lines in test files Co-Authored-By: Claude Opus 4.6 --- pkg/agent/loop_media.go | 5 +++-- pkg/agent/loop_test.go | 7 ++++--- pkg/providers/openai_compat/provider_test.go | 1 - 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pkg/agent/loop_media.go b/pkg/agent/loop_media.go index 813feef696..82547a0087 100644 --- a/pkg/agent/loop_media.go +++ b/pkg/agent/loop_media.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/h2non/filetype" + "github.com/sipeed/picoclaw/pkg/logger" "github.com/sipeed/picoclaw/pkg/media" "github.com/sipeed/picoclaw/pkg/providers" @@ -72,8 +73,8 @@ func resolveMediaRefs(messages []providers.Message, store media.MediaStore, maxS // Determine MIME type: prefer metadata, fallback to magic-bytes detection mime := meta.ContentType if mime == "" { - kind, err := filetype.MatchFile(localPath) - if err != nil || kind == filetype.Unknown { + kind, ftErr := filetype.MatchFile(localPath) + if ftErr != nil || kind == filetype.Unknown { logger.WarnCF("agent", "Unknown media type, skipping", map[string]any{ "path": localPath, }) diff --git a/pkg/agent/loop_test.go b/pkg/agent/loop_test.go index 4076c6e7c4..023286f02b 100644 --- a/pkg/agent/loop_test.go +++ b/pkg/agent/loop_test.go @@ -906,10 +906,12 @@ func TestResolveMediaRefs_DoesNotMutateOriginal(t *testing.T) { store := media.NewFileMediaStore() dir := t.TempDir() pngPath := filepath.Join(dir, "test.png") - pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, + pngHeader := []byte{ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x02, - 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xDE} + 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, 0xDE, + } os.WriteFile(pngPath, pngHeader, 0o644) ref, _ := store.Store(pngPath, media.MediaMeta{}, "test") @@ -947,4 +949,3 @@ func TestResolveMediaRefs_UsesMetaContentType(t *testing.T) { t.Fatalf("expected jpeg prefix, got %q", result[0].Media[0][:30]) } } - diff --git a/pkg/providers/openai_compat/provider_test.go b/pkg/providers/openai_compat/provider_test.go index 9d3b91a1af..174bcf00d8 100644 --- a/pkg/providers/openai_compat/provider_test.go +++ b/pkg/providers/openai_compat/provider_test.go @@ -513,4 +513,3 @@ func TestSerializeMessages_StripsSystemParts(t *testing.T) { t.Fatal("system_parts should not appear in serialized output") } } -