Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 75 additions & 19 deletions pkg/channels/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"strings"
"sync"
"time"
"unicode/utf8"

th "github.com/mymmrac/telego/telegohandler"

Expand All @@ -24,6 +25,13 @@ import (
"github.com/sipeed/picoclaw/pkg/voice"
)

const (
// Telegram has a limit of 4096 characters per message.
// Use a conservative limit on the original content to account for HTML markup expansion.
telegramMessageLimit = 4096
telegramSafeContentLength = 3000
)

type TelegramChannel struct {
*BaseChannel
bot *telego.Bot
Expand Down Expand Up @@ -157,33 +165,81 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err
c.stopThinking.Delete(msg.ChatID)
}

htmlContent := markdownToTelegramHTML(msg.Content)
var (
placeholderID int
hasPlaceholder bool
)

// Try to edit placeholder
// Try to use placeholder (thinking...) message for the first chunk
if pID, ok := c.placeholders.Load(msg.ChatID); ok {
c.placeholders.Delete(msg.ChatID)
editMsg := tu.EditMessageText(tu.ID(chatID), pID.(int), htmlContent)
editMsg.ParseMode = telego.ModeHTML

if _, err = c.bot.EditMessageText(ctx, editMsg); err == nil {
return nil
if id, ok := pID.(int); ok {
placeholderID = id
hasPlaceholder = true
}
// Fallback to new message if edit fails
}

tgMsg := tu.Message(tu.ID(chatID), htmlContent)
tgMsg.ParseMode = telego.ModeHTML
chunkIndex := 0

if _, err = c.bot.SendMessage(ctx, tgMsg); err != nil {
logger.ErrorCF("telegram", "HTML parse failed, falling back to plain text", map[string]interface{}{
"error": err.Error(),
})
tgMsg.ParseMode = ""
_, err = c.bot.SendMessage(ctx, tgMsg)
return err
}
// Split long messages to stay under Telegram limits and avoid delivery failures.
sendErr := utils.SplitMessageIter(msg.Content, telegramSafeContentLength, func(chunk string) error {
htmlContent := markdownToTelegramHTML(chunk)

return nil
// First chunk: try to edit the existing placeholder message
if hasPlaceholder && chunkIndex == 0 {
editMsg := tu.EditMessageText(tu.ID(chatID), placeholderID, htmlContent)
editMsg.ParseMode = telego.ModeHTML

if _, err := c.bot.EditMessageText(ctx, editMsg); err == nil {
chunkIndex++
return nil
}

logger.WarnCF("telegram", "Failed to edit placeholder message, sending new message instead", map[string]interface{}{
"error": err.Error(),
})

// If edit fails, fall back to sending a new message for this and subsequent chunks
hasPlaceholder = false
Comment on lines +196 to +203
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

err is referenced outside the scope where it’s declared: if _, err := c.bot.EditMessageText(...); ... defines err only inside the if, but logger.WarnCF(..., {"error": err.Error()}) uses it after the block. This will not compile. Capture the error into a variable (e.g., editErr := ...) before logging, or restructure the if to keep the error in scope.

Suggested change
}
logger.WarnCF("telegram", "Failed to edit placeholder message, sending new message instead", map[string]interface{}{
"error": err.Error(),
})
// If edit fails, fall back to sending a new message for this and subsequent chunks
hasPlaceholder = false
} else {
logger.WarnCF("telegram", "Failed to edit placeholder message, sending new message instead", map[string]interface{}{
"error": err.Error(),
})
// If edit fails, fall back to sending a new message for this and subsequent chunks
hasPlaceholder = false
}

Copilot uses AI. Check for mistakes.
}

tgMsg := tu.Message(tu.ID(chatID), htmlContent)
tgMsg.ParseMode = telego.ModeHTML

if utf8.RuneCountInString(tgMsg.Text) > telegramMessageLimit {
// As an extra safeguard, truncate if HTML expansion unexpectedly exceeds Telegram's hard limit.
runes := []rune(tgMsg.Text)
if len(runes) > telegramMessageLimit {
tgMsg.Text = string(runes[:telegramMessageLimit])
}
}

if _, err := c.bot.SendMessage(ctx, tgMsg); err != nil {
logger.ErrorCF("telegram", "Failed to send HTML message, falling back to plain text", map[string]interface{}{
"error": err.Error(),
})

// Fallback to plain text using the original chunk content
tgMsg.ParseMode = ""
tgMsg.Text = chunk

// Final safety: hard truncate plain text if still too long
if utf8.RuneCountInString(tgMsg.Text) > telegramMessageLimit {
runes := []rune(tgMsg.Text)
if len(runes) > telegramMessageLimit {
tgMsg.Text = string(runes[:telegramMessageLimit])
}
}

if _, err := c.bot.SendMessage(ctx, tgMsg); err != nil {
return err
}
}
chunkIndex++
return nil
})

return sendErr
}

func (c *TelegramChannel) handleMessage(ctx context.Context, message *telego.Message) error {
Expand Down
150 changes: 150 additions & 0 deletions pkg/utils/string.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package utils

import "strings"

// Truncate returns a truncated version of s with at most maxLen runes.
// Handles multi-byte Unicode characters properly.
// If the string is truncated, "..." is appended to indicate truncation.
Expand All @@ -14,3 +16,151 @@
}
return string(runes[:maxLen-3]) + "..."
}

// SplitMessage splits long messages into chunks, preserving code block integrity where possible.
// Logic is inspired by the Discord channel implementation and is channel-agnostic.
// This allocates a slice to hold all chunks; for streaming use SplitMessageIter.
func SplitMessage(content string, limit int) []string {

Check failure on line 23 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Linter

SplitMessage redeclared in this block

Check failure on line 23 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Vet

SplitMessage redeclared in this block

Check failure on line 23 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Tests

SplitMessage redeclared in this block
var messages []string

_ = SplitMessageIter(content, limit, func(chunk string) error {
messages = append(messages, chunk)
return nil
})

return messages
}

// SplitMessageIter splits content into chunks and calls cb for each chunk.
// This avoids allocating a slice to hold all chunks and is more memory-efficient for very large messages.
func SplitMessageIter(content string, limit int, cb func(chunk string) error) error {
content = strings.TrimSpace(content)
for len(content) > 0 {
if len(content) <= limit {
if content != "" {
if err := cb(content); err != nil {
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SplitMessageIter will panic when limit <= 0 (due to content[:limit]) and will also panic if cb is nil. Since this is a shared utility, it should defensively validate inputs (e.g., return an error when limit <= 0 or cb == nil).

Copilot uses AI. Check for mistakes.
return err
}
}
break
}

msgEnd := limit

// Find natural split point within the limit
msgEnd = findLastNewline(content[:limit], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:limit], 100)
}
if msgEnd <= 0 {
msgEnd = limit
}
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SplitMessageIter slices strings using byte indices (content[:limit], content[:msgEnd]). If msgEnd falls in the middle of a multi-byte UTF-8 rune (e.g., when no newline/space is found and msgEnd = limit), the resulting chunk will contain invalid UTF-8. This can corrupt output and may break downstream processing (e.g., markdown conversion). Adjust msgEnd to a rune boundary (e.g., backtrack until utf8.ValidString(content[:msgEnd]) or use utf8.RuneStart) before slicing.

Copilot uses AI. Check for mistakes.

// Check if this would end with an incomplete code block
candidate := content[:msgEnd]
unclosedIdx := findLastUnclosedCodeBlock(candidate)

if unclosedIdx >= 0 {
// Message would end with incomplete code block
// Try to extend to include the closing ``` (with some buffer)
extendedLimit := limit + 500 // Allow buffer for code blocks
if len(content) > extendedLimit {
closingIdx := findNextClosingCodeBlock(content, msgEnd)
if closingIdx > 0 && closingIdx <= extendedLimit {
// Extend to include the closing ```
msgEnd = closingIdx
} else {
// Can't find closing, split before the code block
msgEnd = findLastNewline(content[:unclosedIdx], 200)
if msgEnd <= 0 {
msgEnd = findLastSpace(content[:unclosedIdx], 100)
}
if msgEnd <= 0 {
msgEnd = unclosedIdx
}
}
} else {
// Remaining content fits within extended limit
msgEnd = len(content)
}
}

if msgEnd <= 0 {
msgEnd = limit
}

chunk := strings.TrimSpace(content[:msgEnd])
if chunk != "" {
if err := cb(chunk); err != nil {
return err
}
}
content = strings.TrimSpace(content[msgEnd:])
}
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SplitMessageIter uses strings.TrimSpace on the full content and on every produced chunk. This changes the message content (removes leading/trailing whitespace/newlines), which can alter formatting (notably indentation in code blocks / markdown lists) and makes the splitter non-lossless. To preserve original content, avoid trimming the emitted chunks; if you need to prevent empty/whitespace-only chunks, consider trimming only for the emptiness check or trimming only the boundary newline without removing indentation.

Copilot uses AI. Check for mistakes.

return nil
}

// findLastUnclosedCodeBlock finds the last opening ``` that doesn't have a closing ```.
// Returns the position of the opening ``` or -1 if all code blocks are complete.
func findLastUnclosedCodeBlock(text string) int {

Check failure on line 106 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Linter

findLastUnclosedCodeBlock redeclared in this block

Check failure on line 106 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Vet

findLastUnclosedCodeBlock redeclared in this block

Check failure on line 106 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Tests

findLastUnclosedCodeBlock redeclared in this block
count := 0
lastOpenIdx := -1

for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
if count == 0 {
lastOpenIdx = i
}
count++
i += 2
}
}

// If odd number of ``` markers, last one is unclosed
if count%2 == 1 {
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

findLastUnclosedCodeBlock doesn’t reliably return the last unclosed opening fence. With multiple code blocks, lastOpenIdx is only set on the very first fence (if count == 0), so if a later code block is the one left unclosed, this function returns the wrong index. Consider tracking an inCodeBlock boolean (toggle on each ```), and updating lastOpenIdx on each opening fence so the final unclosed opening is returned correctly.

Suggested change
count := 0
lastOpenIdx := -1
for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
if count == 0 {
lastOpenIdx = i
}
count++
i += 2
}
}
// If odd number of ``` markers, last one is unclosed
if count%2 == 1 {
lastOpenIdx := -1
inCodeBlock := false
for i := 0; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
if !inCodeBlock {
// Opening fence
lastOpenIdx = i
inCodeBlock = true
} else {
// Closing fence
inCodeBlock = false
}
i += 2
}
}
if inCodeBlock {

Copilot uses AI. Check for mistakes.
return lastOpenIdx
}
return -1
}

// findNextClosingCodeBlock finds the next closing ``` starting from a position.
// Returns the position after the closing ``` or -1 if not found.
func findNextClosingCodeBlock(text string, startIdx int) int {

Check failure on line 129 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Linter

findNextClosingCodeBlock redeclared in this block

Check failure on line 129 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Vet

findNextClosingCodeBlock redeclared in this block

Check failure on line 129 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Tests

findNextClosingCodeBlock redeclared in this block
for i := startIdx; i < len(text); i++ {
if i+2 < len(text) && text[i] == '`' && text[i+1] == '`' && text[i+2] == '`' {
return i + 3
}
}
return -1
}

// findLastNewline finds the last newline character within the last N characters.
// Returns the position of the newline or -1 if not found.
func findLastNewline(s string, searchWindow int) int {

Check failure on line 140 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Linter

findLastNewline redeclared in this block

Check failure on line 140 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Vet

findLastNewline redeclared in this block

Check failure on line 140 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Tests

findLastNewline redeclared in this block
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == '\n' {
return i
}
}
return -1
}

// findLastSpace finds the last space character within the last N characters.
// Returns the position of the space or -1 if not found.
func findLastSpace(s string, searchWindow int) int {

Check failure on line 155 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Linter

findLastSpace redeclared in this block

Check failure on line 155 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Vet

findLastSpace redeclared in this block

Check failure on line 155 in pkg/utils/string.go

View workflow job for this annotation

GitHub Actions / Tests

findLastSpace redeclared in this block
searchStart := len(s) - searchWindow
if searchStart < 0 {
searchStart = 0
}
for i := len(s) - 1; i >= searchStart; i-- {
if s[i] == ' ' || s[i] == '\t' {
return i
}
}
return -1
}
Loading