diff --git a/Dockerfile b/Dockerfile index 0360cfda62..9be9a68e14 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,16 +3,19 @@ # ============================================================ FROM golang:1.26.0-alpine AS builder -RUN apk add --no-cache git make +# Install build dependencies +RUN apk add --no-cache git make gcc musl-dev WORKDIR /src -# Cache dependencies +# Cache dependencies for faster subsequent builds COPY go.mod go.sum ./ RUN go mod download -# Copy source and build +# Copy your local source code (where you'll add the Thought Signature fix) COPY . . + +# Compile the binary RUN make build # ============================================================ @@ -20,13 +23,14 @@ RUN make build # ============================================================ FROM alpine:3.23 +# Install runtime essentials RUN apk add --no-cache ca-certificates tzdata curl # Health check HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD wget -q --spider http://localhost:18790/health || exit 1 -# Copy binary +# Copy the compiled binary from the builder stage COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw # Create non-root user and group @@ -39,5 +43,6 @@ USER picoclaw # Run onboard to create initial directories and config RUN /usr/local/bin/picoclaw onboard +# Set the binary as the entrypoint ENTRYPOINT ["picoclaw"] -CMD ["gateway"] +CMD ["gateway"] \ No newline at end of file diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 8c6c58c96b..75d0b1e12d 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -624,13 +624,19 @@ func (al *AgentLoop) runLLMIteration(ctx context.Context, messages []providers.M } for _, tc := range response.ToolCalls { argumentsJSON, _ := json.Marshal(tc.Arguments) + // Copy ExtraContent to ensure thought_signature is persisted + extraContent := tc.ExtraContent + assistantMsg.ToolCalls = append(assistantMsg.ToolCalls, providers.ToolCall{ ID: tc.ID, Type: "function", Function: &providers.FunctionCall{ - Name: tc.Name, - Arguments: string(argumentsJSON), + Name: tc.Name, + Arguments: string(argumentsJSON), }, + ExtraContent: extraContent, + // We also set internal ThoughtSignature, but ExtraContent is what matters for serialization + ThoughtSignature: tc.ThoughtSignature, }) } messages = append(messages, assistantMsg) @@ -737,7 +743,7 @@ func (al *AgentLoop) maybeSummarize(sessionKey, channel, chatID string) { al.bus.PublishOutbound(bus.OutboundMessage{ Channel: channel, ChatID: chatID, - Content: "⚠️ Memory threshold reached. Optimizing conversation history...", + Content: "🔄 Context optimized. (Memory threshold reached)", }) } al.summarizeSession(sessionKey) diff --git a/pkg/channels/discord.go b/pkg/channels/discord.go index 00aa8ab4de..b290c75e32 100644 --- a/pkg/channels/discord.go +++ b/pkg/channels/discord.go @@ -4,7 +4,7 @@ import ( "context" "fmt" "os" - "strings" + "sync" "time" "github.com/bwmarrin/discordgo" @@ -26,6 +26,7 @@ type DiscordChannel struct { config config.DiscordConfig transcriber *voice.GroqTranscriber ctx context.Context + typingTasks sync.Map } func NewDiscordChannel(cfg config.DiscordConfig, bus *bus.MessageBus) (*DiscordChannel, error) { @@ -101,6 +102,18 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return fmt.Errorf("channel ID is empty") } + // Stop typing indicator heartbeat for this channel + if stop, ok := c.typingTasks.Load(channelID); ok { + if stopChan, ok := stop.(chan struct{}); ok { + select { + case <-stopChan: + default: + close(stopChan) + } + } + c.typingTasks.Delete(channelID) + } + runes := []rune(msg.Content) if len(runes) == 0 { return nil @@ -120,97 +133,102 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro // splitMessage splits long messages into chunks, preserving code block integrity // Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks func splitMessage(content string, limit int) []string { - var messages []string + if limit > 1900 { + limit = 1900 + } + runes := []rune(content) + if len(runes) <= limit { + return []string{content} + } - for len(content) > 0 { - if len(content) <= limit { - messages = append(messages, content) + var chunks []string + for len(runes) > 0 { + if len(runes) <= limit { + chunks = append(chunks, string(runes)) break } - msgEnd := limit + splitAt := limit - // Find natural split point within the limit - msgEnd = findLastNewline(content[:limit], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:limit], 100) + // Look for natural split points (newlines) in a window + window := 300 + if splitAt < window { + window = splitAt } - if msgEnd <= 0 { - msgEnd = limit + + foundNatural := false + for i := splitAt - 1; i >= splitAt-window; i-- { + if runes[i] == '\n' { + splitAt = i + 1 + foundNatural = true + break + } } - // Check if this would end with an incomplete code block - candidate := content[:msgEnd] - unclosedIdx := findLastUnclosedCodeBlock(candidate) - - if unclosedIdx >= 0 { - // Message would end with incomplete code block - // Try to extend to include the closing ``` (with some buffer) - extendedLimit := limit + 500 // Allow 500 char buffer for code blocks - if len(content) > extendedLimit { - closingIdx := findNextClosingCodeBlock(content, msgEnd) - if closingIdx > 0 && closingIdx <= extendedLimit { - // Extend to include the closing ``` - msgEnd = closingIdx - } else { - // Can't find closing, split before the code block - msgEnd = findLastNewline(content[:unclosedIdx], 200) - if msgEnd <= 0 { - msgEnd = findLastSpace(content[:unclosedIdx], 100) - } - if msgEnd <= 0 { - msgEnd = unclosedIdx + if !foundNatural { + for i := splitAt - 1; i >= splitAt-window/2; i-- { + if runes[i] == ' ' || runes[i] == '\t' { + splitAt = i + 1 + foundNatural = true + break + } + } + } + + // Check for unclosed code blocks + chunkCandidate := runes[:splitAt] + if isInsideCodeBlock(chunkCandidate) { + // Try to find the closing code block within extended limit + extendedLimit := limit + 300 + if extendedLimit > 2000 { + extendedLimit = 2000 + } + + foundClosing := false + for i := splitAt; i < len(runes)-2 && i < extendedLimit-3; i++ { + if runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { + splitAt = i + 3 + foundClosing = true + break + } + } + + if !foundClosing { + // Can't find closing within reasonable limit, split before the block + for i := splitAt - 1; i >= 0; i-- { + if i+2 < len(runes) && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { + if i > 0 { + splitAt = i + } + break } } - } else { - // Remaining content fits within extended limit - msgEnd = len(content) } } - if msgEnd <= 0 { - msgEnd = limit + if splitAt <= 0 { + splitAt = limit } - messages = append(messages, content[:msgEnd]) - content = strings.TrimSpace(content[msgEnd:]) + chunks = append(chunks, string(runes[:splitAt])) + runes = runes[splitAt:] } - return messages + return chunks } -// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ``` -// Returns the position of the opening ``` or -1 if all code blocks are complete -func findLastUnclosedCodeBlock(text string) int { +func isInsideCodeBlock(runes []rune) bool { count := 0 - lastOpenIdx := -1 - - for i := 0; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - if count == 0 { - lastOpenIdx = i - } + for i := 0; i < len(runes)-2; i++ { + if runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { count++ i += 2 } } - - // If odd number of ``` markers, last one is unclosed - if count%2 == 1 { - return lastOpenIdx - } - return -1 + return count%2 != 0 } -// findNextClosingCodeBlock finds the next closing ``` starting from a position -// Returns the position after the closing ``` or -1 if not found -func findNextClosingCodeBlock(text string, startIdx int) int { - for i := startIdx; i < len(text); i++ { - if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' { - return i + 3 - } - } - return -1 +func _unused_marker_() { } // findLastNewline finds the last newline character within the last N characters @@ -282,11 +300,33 @@ func (c *DiscordChannel) handleMessage(s *discordgo.Session, m *discordgo.Messag return } - if err := c.session.ChannelTyping(m.ChannelID); err != nil { - logger.ErrorCF("discord", "Failed to send typing indicator", map[string]any{ - "error": err.Error(), - }) + // Start typing indicator heartbeat + stopTyping := make(chan struct{}) + if old, ok := c.typingTasks.Load(m.ChannelID); ok { + if oldChan, ok := old.(chan struct{}); ok { + select { + case <-oldChan: + default: + close(oldChan) + } + } } + c.typingTasks.Store(m.ChannelID, stopTyping) + + go func(chID string, stop chan struct{}) { + // Send initial typing + _ = c.session.ChannelTyping(chID) + ticker := time.NewTicker(7 * time.Second) // Discord typing lasts ~10s + defer ticker.Stop() + for { + select { + case <-stop: + return + case <-ticker.C: + _ = c.session.ChannelTyping(chID) + } + } + }(m.ChannelID, stopTyping) // 检查白名单,避免为被拒绝的用户下载附件和转录 if !c.IsAllowed(m.Author.ID) { diff --git a/pkg/providers/http_provider.go b/pkg/providers/http_provider.go index 946aa29d22..0a410431f2 100644 --- a/pkg/providers/http_provider.go +++ b/pkg/providers/http_provider.go @@ -61,6 +61,8 @@ func (p *HTTPProvider) Chat(ctx context.Context, messages []Message, tools []Too } } + // Pre-process messages loop removed - relying on ExtraContent persistence in Agent Loop. + requestBody := map[string]interface{}{ "model": model, "messages": messages, @@ -135,6 +137,11 @@ func (p *HTTPProvider) parseResponse(body []byte) (*LLMResponse, error) { Name string `json:"name"` Arguments string `json:"arguments"` } `json:"function"` + ExtraContent *struct { + Google *struct { + ThoughtSignature string `json:"thought_signature"` + } `json:"google"` + } `json:"extra_content"` } `json:"tool_calls"` } `json:"message"` FinishReason string `json:"finish_reason"` @@ -160,7 +167,12 @@ func (p *HTTPProvider) parseResponse(body []byte) (*LLMResponse, error) { arguments := make(map[string]interface{}) name := "" - // Handle OpenAI format with nested function object + // Extract thought_signature from Gemini/Google-specific extra content + thoughtSignature := "" + if tc.ExtraContent != nil && tc.ExtraContent.Google != nil { + thoughtSignature = tc.ExtraContent.Google.ThoughtSignature + } + if tc.Type == "function" && tc.Function != nil { name = tc.Function.Name if tc.Function.Arguments != "" { @@ -178,11 +190,23 @@ func (p *HTTPProvider) parseResponse(body []byte) (*LLMResponse, error) { } } - toolCalls = append(toolCalls, ToolCall{ - ID: tc.ID, - Name: name, - Arguments: arguments, - }) + // Correctly map extracted ExtraContent to ToolCall struct + toolCall := ToolCall{ + ID: tc.ID, + Name: name, + Arguments: arguments, + ThoughtSignature: thoughtSignature, // Populating internal field for convenience + } + + if thoughtSignature != "" { + toolCall.ExtraContent = &ExtraContent{ + Google: &GoogleExtra{ + ThoughtSignature: thoughtSignature, + }, + } + } + + toolCalls = append(toolCalls, toolCall) } return &LLMResponse{ diff --git a/pkg/providers/types.go b/pkg/providers/types.go index 88b62e9758..6df6c9d2d7 100644 --- a/pkg/providers/types.go +++ b/pkg/providers/types.go @@ -3,16 +3,27 @@ package providers import "context" type ToolCall struct { - ID string `json:"id"` - Type string `json:"type,omitempty"` - Function *FunctionCall `json:"function,omitempty"` - Name string `json:"name,omitempty"` - Arguments map[string]interface{} `json:"arguments,omitempty"` + ID string `json:"id"` + Type string `json:"type,omitempty"` + Function *FunctionCall `json:"function,omitempty"` + Name string `json:"name,omitempty"` + Arguments map[string]interface{} `json:"arguments,omitempty"` + ThoughtSignature string `json:"-"` // Internal use only + ExtraContent *ExtraContent `json:"extra_content,omitempty"` +} + +type ExtraContent struct { + Google *GoogleExtra `json:"google,omitempty"` +} + +type GoogleExtra struct { + ThoughtSignature string `json:"thought_signature,omitempty"` } type FunctionCall struct { - Name string `json:"name"` - Arguments string `json:"arguments"` + Name string `json:"name"` + Arguments string `json:"arguments"` + ThoughtSignature string `json:"-"` // Internal use only } type LLMResponse struct { diff --git a/pkg/tools/shell.go b/pkg/tools/shell.go index bd612d9ae6..eca84b2404 100644 --- a/pkg/tools/shell.go +++ b/pkg/tools/shell.go @@ -255,10 +255,23 @@ func (t *ExecTool) guardCommand(command, cwd string) string { return "" } - pathPattern := regexp.MustCompile(`[A-Za-z]:\\[^\\\"']+|/[^\s\"']+`) - matches := pathPattern.FindAllString(cmd, -1) + // Match absolute paths: Unix (starts with / after space/start/quotes) or Windows (X:\) + // This regex is careful not to match scoped packages like @mastra/core + pathPattern := regexp.MustCompile(`(?:\s|^|["'|&;])(/[^\s\"'|&;]+)|([A-Za-z]:\\[^\\\"'|&;]+)`) + matches := pathPattern.FindAllStringSubmatch(cmd, -1) + + for _, match := range matches { + raw := "" + if match[1] != "" { + raw = match[1] // Unix path + } else if match[2] != "" { + raw = match[2] // Windows path + } + + if raw == "" { + continue + } - for _, raw := range matches { p, err := filepath.Abs(raw) if err != nil { continue