Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pkg/channels/base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,3 @@ func TestBaseChannelIsAllowed(t *testing.T) {
})
}
}

146 changes: 144 additions & 2 deletions pkg/channels/discord.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"os"
"strings"
"time"

"github.com/bwmarrin/discordgo"
Expand Down Expand Up @@ -100,15 +101,156 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro
return fmt.Errorf("channel ID is empty")
}

message := msg.Content
runes := []rune(msg.Content)
if len(runes) == 0 {
return nil
}

chunks := splitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks

for _, chunk := range chunks {
if err := c.sendChunk(ctx, channelID, chunk); err != nil {
return err
}
}

return nil
}

// splitMessage splits long messages into chunks, preserving code block integrity
// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks
func splitMessage(content string, limit int) []string {
var messages []string

for len(content) > 0 {
if len(content) <= limit {
messages = append(messages, content)
break
}

msgEnd := limit

// Find natural split point within the limit
msgEnd = findLastNewline(content[:limit], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:limit], 100)
}
if msgEnd <= 0 {
msgEnd = limit
}

// Check if this would end with an incomplete code block
candidate := content[:msgEnd]
unclosedIdx := findLastUnclosedCodeBlock(candidate)

if unclosedIdx >= 0 {
// Message would end with incomplete code block
// Try to extend to include the closing ``` (with some buffer)
extendedLimit := limit + 500 // Allow 500 char buffer for code blocks
if len(content) > extendedLimit {
closingIdx := findNextClosingCodeBlock(content, msgEnd)
if closingIdx > 0 && closingIdx <= extendedLimit {
// Extend to include the closing ```
msgEnd = closingIdx
} else {
// Can't find closing, split before the code block
msgEnd = findLastNewline(content[:unclosedIdx], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:unclosedIdx], 100)
}
if msgEnd <= 0 {
msgEnd = unclosedIdx
}
}
} else {
// Remaining content fits within extended limit
msgEnd = len(content)
}
}

if msgEnd <= 0 {
msgEnd = limit
}

messages = append(messages, content[:msgEnd])
content = strings.TrimSpace(content[msgEnd:])
}

return messages
}

// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
// Returns the position of the opening ``` or -1 if all code blocks are complete
func findLastUnclosedCodeBlock(text string) int {
count := 0
lastOpenIdx := -1

for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
if count == 0 {
lastOpenIdx = i
}
count++
i += 2
}
}

// If odd number of ``` markers, last one is unclosed
if count%2 == 1 {
return lastOpenIdx
}
return -1
}

// findNextClosingCodeBlock finds the next closing ``` starting from a position
// Returns the position after the closing ``` or -1 if not found
func findNextClosingCodeBlock(text string, startIdx int) int {
for i := startIdx; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
return i + 3
}
}
return -1
}

// findLastNewline finds the last newline character within the last N characters
// Returns the position of the newline or -1 if not found
func findLastNewline(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == '\n' {
return i
}
}
return -1
}

// findLastSpace finds the last space character within the last N characters
// Returns the position of the space or -1 if not found
func findLastSpace(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == ' ' || s[i] == '\t' {
return i
}
}
return -1
}

func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error {
// 使用传入的 ctx 进行超时控制
sendCtx, cancel := context.WithTimeout(ctx, sendTimeout)
defer cancel()

done := make(chan error, 1)
go func() {
_, err := c.session.ChannelMessageSend(channelID, message)
_, err := c.session.ChannelMessageSend(channelID, content)
done <- err
}()

Expand Down
8 changes: 4 additions & 4 deletions pkg/tools/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (p *DuckDuckGoSearchProvider) Search(ctx context.Context, query string, cou
func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query string) (string, error) {
// Simple regex based extraction for DDG HTML
// Strategy: Find all result containers or key anchors directly

// Try finding the result links directly first, as they are the most critical
// Pattern: <a class="result__a" href="...">Title</a>
// The previous regex was a bit strict. Let's make it more flexible for attributes order/content
Expand All @@ -133,14 +133,14 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
// But simple global search for snippets might mismatch order.
// Since we only have the raw HTML string, let's just extract snippets globally and assume order matches (risky but simple for regex)
// Or better: Let's assume the snippet follows the link in the HTML

// A better regex approach: iterate through text and find matches in order
// But for now, let's grab all snippets too
reSnippet := regexp.MustCompile(`<a class="result__snippet[^"]*".*?>([\s\S]*?)</a>`)
snippetMatches := reSnippet.FindAllStringSubmatch(html, count+5)

maxItems := min(len(matches), count)

for i := 0; i < maxItems; i++ {
urlStr := matches[i][1]
title := stripTags(matches[i][2])
Expand All @@ -157,7 +157,7 @@ func (p *DuckDuckGoSearchProvider) extractResults(html string, count int, query
}

lines = append(lines, fmt.Sprintf("%d. %s\n %s", i+1, title, urlStr))

// Attempt to attach snippet if available and index aligns
if i < len(snippetMatches) {
snippet := stripTags(snippetMatches[i][1])
Expand Down