Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/agent/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ func (cb *ContextBuilder) BuildMessages(history []providers.Message, summary str

messages = append(messages, history...)

if strings.TrimSpace(currentMessage) != "" {
if strings.TrimSpace(currentMessage) != "" || len(media) > 0 {
messages = append(messages, providers.Message{
Role: "user",
Content: currentMessage,
Media: media,
})
}

Expand Down
20 changes: 11 additions & 9 deletions pkg/agent/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ type AgentLoop struct {

// processOptions configures how a message is processed
type processOptions struct {
SessionKey string // Session identifier for history/context
Channel string // Target channel for tool execution
ChatID string // Target chat ID for tool execution
UserMessage string // User message content (may include prefix)
DefaultResponse string // Response when LLM returns empty
EnableSummary bool // Whether to trigger summarization
SendResponse bool // Whether to send response via bus
NoHistory bool // If true, don't load session history (for heartbeat)
SessionKey string // Session identifier for history/context
Channel string // Target channel for tool execution
ChatID string // Target chat ID for tool execution
UserMessage string // User message content (may include prefix)
Media []string // Media URLs attached to the user message
DefaultResponse string // Response when LLM returns empty
EnableSummary bool // Whether to trigger summarization
SendResponse bool // Whether to send response via bus
NoHistory bool // If true, don't load session history (for heartbeat)
}

func NewAgentLoop(cfg *config.Config, msgBus *bus.MessageBus, provider providers.LLMProvider) *AgentLoop {
Expand Down Expand Up @@ -313,6 +314,7 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage)
Channel: msg.Channel,
ChatID: msg.ChatID,
UserMessage: msg.Content,
Media: msg.Media,
DefaultResponse: "I've completed processing but have no response to give.",
EnableSummary: true,
SendResponse: false,
Expand Down Expand Up @@ -402,7 +404,7 @@ func (al *AgentLoop) runAgentLoop(ctx context.Context, agent *AgentInstance, opt
history,
summary,
opts.UserMessage,
nil,
opts.Media,
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Media field contains URLs that are passed directly to external LLM APIs without validation. Consider adding validation to ensure URLs are from trusted sources (e.g., Discord CDN for Discord messages) and don't expose internal network resources. While the URLs originate from InboundMessage.Media which likely comes from Discord, explicit validation would prevent potential SSRF vulnerabilities if the source changes or if there are bugs in upstream URL extraction.

Copilot uses AI. Check for mistakes.
opts.Channel,
opts.ChatID,
)
Expand Down
43 changes: 42 additions & 1 deletion pkg/providers/openai_compat/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (p *Provider) Chat(ctx context.Context, messages []Message, tools []ToolDef

requestBody := map[string]interface{}{
"model": model,
"messages": messages,
"messages": serializeMessages(messages),
}

if len(tools) > 0 {
Expand Down Expand Up @@ -135,6 +135,47 @@ func (p *Provider) Chat(ctx context.Context, messages []Message, tools []ToolDef
return parseResponse(body)
}

func serializeMessages(messages []Message) []map[string]interface{} {
result := make([]map[string]interface{}, 0, len(messages))
for _, m := range messages {
if len(m.Media) == 0 {
msg := map[string]interface{}{
"role": m.Role,
"content": m.Content,
}
if m.ToolCallID != "" {
msg["tool_call_id"] = m.ToolCallID
}
if len(m.ToolCalls) > 0 {
msg["tool_calls"] = m.ToolCalls
}
result = append(result, msg)
continue
}

parts := make([]map[string]interface{}, 0, 1+len(m.Media))
if m.Content != "" {
parts = append(parts, map[string]interface{}{
"type": "text",
"text": m.Content,
})
}
for _, mediaURL := range m.Media {
parts = append(parts, map[string]interface{}{
"type": "image_url",
"image_url": map[string]interface{}{
"url": mediaURL,
},
})
}
result = append(result, map[string]interface{}{
"role": m.Role,
"content": parts,
})
Comment on lines +171 to +174
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a message contains Media, the tool_calls and tool_call_id fields are not serialized. If a message has both media attachments and tool calls (or is a tool response with media), this information would be lost. Consider handling these fields even when Media is present, or document if this combination is intentionally not supported.

Suggested change
result = append(result, map[string]interface{}{
"role": m.Role,
"content": parts,
})
msg := map[string]interface{}{
"role": m.Role,
"content": parts,
}
if m.ToolCallID != "" {
msg["tool_call_id"] = m.ToolCallID
}
if len(m.ToolCalls) > 0 {
msg["tool_calls"] = m.ToolCalls
}
result = append(result, msg)

Copilot uses AI. Check for mistakes.
}
return result
}
Comment on lines +138 to +177
Copy link

Copilot AI Feb 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The serializeMessages function lacks test coverage. Given the comprehensive test suite for other provider functionality (e.g., TestProviderChat_ParsesToolCalls, TestProviderChat_StripsMoonshotPrefixAndNormalizesKimiTemperature), consider adding tests for message serialization with media attachments to ensure correct OpenAI API format compliance and prevent regressions. Test cases should cover: messages with media only, messages with both text and media, and messages without media (backward compatibility).

Copilot uses AI. Check for mistakes.

func parseResponse(body []byte) (*LLMResponse, error) {
var apiResponse struct {
Choices []struct {
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/protocoltypes/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type UsageInfo struct {
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
Media []string `json:"media,omitempty"` // URLs of images or other media attachments
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
Expand Down
Loading