Skip to content
129 changes: 1 addition & 128 deletions pkg/channels/discord.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"fmt"
"os"
"strings"
"time"

"github.com/bwmarrin/discordgo"
Expand Down Expand Up @@ -106,7 +105,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro
return nil
}

chunks := splitMessage(msg.Content, 1500) // Discord has a limit of 2000 characters per message, leave 500 for natural split e.g. code blocks
chunks := utils.SplitMessage(msg.Content, 2000) // Split messages into chunks, Discord length limit: 2000 chars
Comment on lines 103 to +108
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Send converts the full message to []rune only to check emptiness (len(runes) == 0), which allocates and can be expensive for long messages. This can be replaced with a simple msg.Content == "" (or len(msg.Content)==0) check without changing behavior.

Copilot uses AI. Check for mistakes.

Comment thread
huaaudio marked this conversation as resolved.
for _, chunk := range chunks {
if err := c.sendChunk(ctx, channelID, chunk); err != nil {
Expand All @@ -117,132 +116,6 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro
return nil
}

// splitMessage splits long messages into chunks, preserving code block integrity
// Uses natural boundaries (newlines, spaces) and extends messages slightly to avoid breaking code blocks
func splitMessage(content string, limit int) []string {
var messages []string

for len(content) > 0 {
if len(content) <= limit {
messages = append(messages, content)
break
}

msgEnd := limit

// Find natural split point within the limit
msgEnd = findLastNewline(content[:limit], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:limit], 100)
}
if msgEnd <= 0 {
msgEnd = limit
}

// Check if this would end with an incomplete code block
candidate := content[:msgEnd]
unclosedIdx := findLastUnclosedCodeBlock(candidate)

if unclosedIdx >= 0 {
// Message would end with incomplete code block
// Try to extend to include the closing ``` (with some buffer)
extendedLimit := limit + 500 // Allow 500 char buffer for code blocks
if len(content) > extendedLimit {
closingIdx := findNextClosingCodeBlock(content, msgEnd)
if closingIdx > 0 && closingIdx <= extendedLimit {
// Extend to include the closing ```
msgEnd = closingIdx
} else {
// Can't find closing, split before the code block
msgEnd = findLastNewline(content[:unclosedIdx], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:unclosedIdx], 100)
}
if msgEnd <= 0 {
msgEnd = unclosedIdx
}
}
} else {
// Remaining content fits within extended limit
msgEnd = len(content)
}
}

if msgEnd <= 0 {
msgEnd = limit
}

messages = append(messages, content[:msgEnd])
content = strings.TrimSpace(content[msgEnd:])
}

return messages
}

// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
// Returns the position of the opening ``` or -1 if all code blocks are complete
func findLastUnclosedCodeBlock(text string) int {
count := 0
lastOpenIdx := -1

for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
if count == 0 {
lastOpenIdx = i
}
count++
i += 2
}
}

// If odd number of ``` markers, last one is unclosed
if count%2 == 1 {
return lastOpenIdx
}
return -1
}

// findNextClosingCodeBlock finds the next closing ``` starting from a position
// Returns the position after the closing ``` or -1 if not found
func findNextClosingCodeBlock(text string, startIdx int) int {
for i := startIdx; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
return i + 3
}
}
return -1
}

// findLastNewline finds the last newline character within the last N characters
// Returns the position of the newline or -1 if not found
func findLastNewline(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == '\n' {
return i
}
}
return -1
}

// findLastSpace finds the last space character within the last N characters
// Returns the position of the space or -1 if not found
func findLastSpace(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == ' ' || s[i] == '\t' {
return i
}
}
return -1
}

func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error {
// 使用传ε…₯ηš„ ctx θΏ›θ‘ŒθΆ…ζ—ΆζŽ§εˆΆ
sendCtx, cancel := context.WithTimeout(ctx, sendTimeout)
Expand Down
173 changes: 173 additions & 0 deletions pkg/utils/message.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
package utils

import (
"strings"
)

const defaultCodeBlockBuffer = 500

// SplitMessage splits long messages into chunks, preserving code block integrity.
// The function prefers to split at maxLen - defaultCodeBlockBuffer to leave room for code blocks,
// but may extend up to maxLen when needed to avoid breaking incomplete code blocks.
// Call SplitMessage with the full text content and the maximum allowed length of a single message;
// it returns a slice of message chunks that each respect maxLen and avoid splitting fenced code blocks.
Comment thread
huaaudio marked this conversation as resolved.
Outdated
func SplitMessage(content string, maxLen int) []string {
var messages []string
codeBlockBuffer := defaultCodeBlockBuffer

for len(content) > 0 {
if len(content) <= maxLen {
messages = append(messages, content)
break
}

// Effective split point: maxLen minus buffer, to leave room for code blocks
effectiveLimit := maxLen - codeBlockBuffer
Comment thread
huaaudio marked this conversation as resolved.
if effectiveLimit < maxLen/2 {
effectiveLimit = maxLen / 2
}

Comment thread
huaaudio marked this conversation as resolved.
// Find natural split point within the effective limit
msgEnd := findLastNewline(content[:effectiveLimit], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:effectiveLimit], 100)
}
if msgEnd <= 0 {
msgEnd = effectiveLimit
}
Comment on lines +36 to +43
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function computes split points using len(content) and slices strings by byte index (e.g., content[:effectiveLimit] / msgEnd = effectiveLimit). When there’s no newline/space, msgEnd can land in the middle of a multi-byte UTF-8 rune, producing invalid UTF-8 chunks and potentially garbling output. Consider doing all length accounting/splitting in runes (or adjust msgEnd back to a valid rune boundary using utf8 helpers).

Copilot uses AI. Check for mistakes.
Comment on lines +36 to +43
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SplitMessage computes split points using byte indexes (len, content[:effectiveLimit], content[:msgEnd]). When there are no ASCII split characters near the boundary (e.g., long CJK/emoji text), msgEnd can land in the middle of a UTF-8 sequence, producing invalid UTF-8 in a chunk (and JSON encoding will replace invalid bytes). Consider adjusting msgEnd to the nearest prior UTF-8 rune boundary (or operating on []rune) before slicing.

Copilot uses AI. Check for mistakes.

// Check if this would end with an incomplete code block
candidate := content[:msgEnd]
unclosedIdx := findLastUnclosedCodeBlock(candidate)

if unclosedIdx >= 0 {
// Message would end with incomplete code block
// Try to extend up to maxLen to include the closing ```
if len(content) > msgEnd {
closingIdx := findNextClosingCodeBlock(content, msgEnd)
if closingIdx > 0 && closingIdx <= maxLen {
// Extend to include the closing ```
msgEnd = closingIdx
} else {
// Code block is too long to fit in one chunk or missing closing fence.
// Try to split inside by injecting closing and reopening fences.
headerEnd := strings.Index(content[unclosedIdx:], "\n")
if headerEnd == -1 {
headerEnd = unclosedIdx + 3
} else {
headerEnd += unclosedIdx
}
header := strings.TrimSpace(content[unclosedIdx:headerEnd])

Comment on lines +60 to +67
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the opening fence line doesn’t contain a newline (e.g., lang immediately followed by content), the `headerEnd == -1` branch sets `headerEnd = unclosedIdx + 3`, so `header` becomes just "" and the language tag is lost when reopening. Consider falling back to the next whitespace/end-of-string (or msgEnd) so the language specifier is preserved when present.

Copilot uses AI. Check for mistakes.
// If we have a reasonable amount of content after the header, split inside
if msgEnd > headerEnd+20 {
// Find a better split point closer to maxLen
innerLimit := maxLen - 5 // Leave room for "\n```"
betterEnd := findLastNewline(content[:innerLimit], 200)
if betterEnd > headerEnd {
msgEnd = betterEnd
} else {
msgEnd = innerLimit
}
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
continue
Comment on lines +78 to +80
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When splitting inside a code block, content = strings.TrimSpace(header + "\n" + content[msgEnd:]) removes all leading whitespace from the reopened chunk. If the code block content starts with indentation (common in code), this changes the message content rather than just splitting it. Consider avoiding TrimSpace here (e.g., only trimming leading newlines) to preserve code formatting.

Copilot uses AI. Check for mistakes.
}

// Otherwise, try to split before the code block starts
newEnd := findLastNewline(content[:unclosedIdx], 200)
if newEnd <= 0 {
newEnd = findLastSpace(content[:unclosedIdx], 100)
}
if newEnd > 0 {
msgEnd = newEnd
} else {
// If we can't split before, we MUST split inside (last resort)
if unclosedIdx > 20 {
msgEnd = unclosedIdx
} else {
msgEnd = maxLen - 5
messages = append(messages, strings.TrimRight(content[:msgEnd], " \t\n\r")+"\n```")
content = strings.TrimSpace(header + "\n" + content[msgEnd:])
continue
Comment on lines +95 to +98
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above: strings.TrimSpace(header + "\n" + content[msgEnd:]) in this branch can strip meaningful leading indentation/whitespace from code content after the reopened fence, altering output. Prefer preserving the original whitespace (or only trimming the newline(s) introduced by the split).

Copilot uses AI. Check for mistakes.
}
}
}
}
}

if msgEnd <= 0 {
msgEnd = effectiveLimit
}
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a potential infinite loop when msgEnd is set to 0. This can occur on line 95 when unclosedIdx is 0 (code block starts at the beginning), or potentially from other code paths. When msgEnd is 0, line 112 performs content = strings.TrimSpace(content[msgEnd:]) which doesn't reduce the content length (it's just trimming the same string), causing an infinite loop. The guard on lines 107-109 sets msgEnd to effectiveLimit only if msgEnd <= 0, but this happens AFTER the code block handling logic which may have set msgEnd to 0 and then taken a continue path earlier. Consider adding validation that msgEnd must be > 0 before line 111, or ensure line 95 sets msgEnd to at least 1.

Suggested change
}
}
// Ensure msgEnd is a valid, positive index so the loop always makes progress.
if msgEnd <= 0 {
msgEnd = 1
} else if msgEnd > len(content) {
msgEnd = len(content)
}

Copilot uses AI. Check for mistakes.

messages = append(messages, content[:msgEnd])
content = strings.TrimSpace(content[msgEnd:])
Comment on lines +109 to +110
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The SplitMessage function uses byte-based indexing (string slicing) which can split multi-byte UTF-8 characters, potentially creating invalid UTF-8 sequences. When slicing at positions found by findLastNewline, findLastSpace, or at arbitrary positions like effectiveLimit or innerLimit, the function may split in the middle of a multi-byte character. Consider using rune-based indexing or validating split points to ensure they fall on character boundaries. The test case "Preserve Unicode characters" acknowledges this issue but doesn't verify the output is valid UTF-8.

Copilot uses AI. Check for mistakes.
}
Comment thread
huaaudio marked this conversation as resolved.

return messages
}
Comment on lines +7 to +114
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SplitMessage introduces non-trivial behavior (natural boundaries, code block fence handling, and the 500-char buffer) but there are no unit tests verifying chunk sizes, code-block integrity, and edge cases (e.g., long code blocks without closing fences, no whitespace, Unicode input). Given the repo has existing Go unit tests, please add focused tests in pkg/utils for this helper.

Copilot uses AI. Check for mistakes.
Comment thread
huaaudio marked this conversation as resolved.

// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```
// Returns the position of the opening ``` or -1 if all code blocks are complete
func findLastUnclosedCodeBlock(text string) int {
inCodeBlock := false
lastOpenIdx := -1

for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
// Toggle code block state on each fence
if !inCodeBlock {
// Entering a code block: record this opening fence
lastOpenIdx = i
}
inCodeBlock = !inCodeBlock
i += 2
}
}

if inCodeBlock {
return lastOpenIdx
}
return -1
}

// findNextClosingCodeBlock finds the next closing ``` starting from a position
// Returns the position after the closing ``` or -1 if not found
func findNextClosingCodeBlock(text string, startIdx int) int {
for i := startIdx; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
return i + 3
}
}
return -1
}

// findLastNewline finds the last newline character within the last N characters
// Returns the position of the newline or -1 if not found
func findLastNewline(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == '\n' {
return i
}
}
return -1
}

// findLastSpace finds the last space character within the last N characters
// Returns the position of the space or -1 if not found
func findLastSpace(s string, searchWindow int) int {
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == ' ' || s[i] == '\t' {
return i
}
}
return -1
}