Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ dist/
# Windows Application Icon/Resource
*.syso

# Test telegram integration
cmd/telegram/

# Keep embedded backend dist directory placeholder in VCS
!web/backend/dist/
web/backend/dist/*
Expand Down
5 changes: 2 additions & 3 deletions config/config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,8 @@
"token": "YOUR_TELEGRAM_BOT_TOKEN",
"base_url": "",
"proxy": "",
"allow_from": [
"YOUR_USER_ID"
],
"allow_from": ["YOUR_USER_ID"],
"use_markdown_v2": false,
"reasoning_channel_id": ""
},
"discord": {
Expand Down
6 changes: 5 additions & 1 deletion docs/chat-apps.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ Talk to your picoclaw through Telegram, Discord, WhatsApp, Matrix, QQ, DingTalk,
"telegram": {
"enabled": true,
"token": "YOUR_BOT_TOKEN",
"allow_from": ["YOUR_USER_ID"]
"allow_from": ["YOUR_USER_ID"],
"use_markdown_v2": false,
}
}
}
Expand All @@ -63,6 +64,9 @@ Telegram command menu registration remains channel-local discovery UX; generic c

If command registration fails (network/API transient errors), the channel still starts and PicoClaw retries registration in the background.

**4. Advanced Formatting**
You can set use_markdown_v2: true to enable enhanced formatting options. This allows the bot to utilize the full range of Telegram MarkdownV2 features, including nested styles, spoilers, and custom fixed-width blocks.

</details>

<details>
Expand Down
197 changes: 197 additions & 0 deletions pkg/channels/telegram/parse_markdown_to_md_v2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package telegram

import (
"regexp"
"strings"
)

// mdV2SpecialChars are all characters that must be escaped in Telegram MarkdownV2
var mdV2SpecialChars = map[rune]bool{
'*': true,
'_': true,
'[': true,
']': true,
'(': true,
')': true,
'~': true,
'`': true,
'>': true,
'<': true,
'#': true,
'+': true,
'-': true,
'=': true,
'|': true,
'{': true,
'}': true,
'.': true,
'!': true,
'\\': true,
}

// entityPattern describes one Telegram MarkdownV2 inline entity type.
type entityPattern struct {
re *regexp.Regexp
open string
close string
}

// allEntityPatterns lists every recognized entity in priority order
// (longer / more-specific delimiters first so they win over shorter ones).
// Each entry's regex is anchored to find the first occurrence in a string.
var allEntityPatterns = []entityPattern{
// fenced code block β€” content is completely verbatim
{re: regexp.MustCompile("(?s)```(?:[\\w]*\\n)?[\\s\\S]*?```"), open: "```", close: "```"},
// inline code β€” content is completely verbatim
{re: regexp.MustCompile("`(?:[^`\\\n]|\\\\.)*`"), open: "`", close: "`"},
// expandable block-quote opener **>…
{re: regexp.MustCompile(`(?m)\*\*>(?:[^\n]*)`), open: "**>", close: ""},
// block-quote line >…
{re: regexp.MustCompile(`(?m)^>(?:[^\n]*)`), open: ">", close: ""},
// custom emoji / timestamp ![…](…) β€” must come before plain link
{re: regexp.MustCompile(`!\[[^\]]*\]\([^)]*\)`), open: "!", close: ""},
// inline URL / user mention […](…)
{re: regexp.MustCompile(`\[[^\]]*\]\([^)]*\)`), open: "[", close: ""},
// spoiler ||…|| β€” before single | so it wins
{re: regexp.MustCompile(`\|\|(?:[^|\\\n]|\\.)*\|\|`), open: "||", close: "||"},
// underline __…__ β€” before single _ so it wins
{re: regexp.MustCompile(`__(?:[^_\\\n]|\\.)*__`), open: "__", close: "__"},
// bold *…*
{re: regexp.MustCompile(`\*(?:[^*\\\n]|\\.)*\*`), open: "*", close: "*"},
// italic _…_
{re: regexp.MustCompile(`_(?:[^_\\\n]|\\.)*_`), open: "_", close: "_"},
// strikethrough ~…~
{re: regexp.MustCompile(`~(?:[^~\\\n]|\\.)*~`), open: "~", close: "~"},
}

// verbatimEntities are entity types whose inner content must never be
// touched (code blocks, URLs, quotes, custom emoji).
// Their content is passed through completely unchanged.
var verbatimEntities = map[string]bool{
"```": true,
"`": true,
"**>": true,
">": true,
"!": true,
"[": true,
}

// markdownToTelegramMarkdownV2 converts a Markdown string into a string safe
// for sending with Telegram's MarkdownV2 parse mode.
//
// Rules:
// - Markdown headings (# … ######) are converted to *bold*.
// - **bold** Markdown syntax is converted to *bold*.
// - Recognized Telegram MarkdownV2 entity spans are preserved; their inner
// content is processed recursively so that nested valid entities are kept
// intact while stray special characters are escaped.
// - All plain-text segments have their MarkdownV2 special characters escaped.
//
// Reference: https://core.telegram.org/bots/api#formatting-options
func markdownToTelegramMarkdownV2(text string) string {
// 1. Convert Markdown headings β†’ *escaped heading text*
text = reHeading.ReplaceAllStringFunc(text, func(match string) string {
sub := reHeading.FindStringSubmatch(match)
if len(sub) < 2 {
return match
}
// The heading content is fresh plain text β€” escape everything
// including * so the resulting *…* bold span stays valid.
return "*" + escapeMarkdownV2(sub[1]) + "*"
})

// 2. Convert **bold** β†’ *bold*
text = reBoldStar.ReplaceAllString(text, "*$1*")

// 3. Recursively escape the full string.
return processText(text)
}

// processText walks `text`, finds the leftmost / longest matching entity,
// escapes the gap before it, processes the entity (recursing into its inner
// content when appropriate), then continues with the remainder.
func processText(text string) string {
if text == "" {
return ""
}

// Find the leftmost match among all entity patterns.
bestStart := -1
bestEnd := -1
var bestPat *entityPattern

for i := range allEntityPatterns {
p := &allEntityPatterns[i]
loc := p.re.FindStringIndex(text)
if loc == nil {
continue
}
if bestStart == -1 || loc[0] < bestStart ||
(loc[0] == bestStart && (loc[1]-loc[0]) > (bestEnd-bestStart)) {
bestStart = loc[0]
bestEnd = loc[1]
bestPat = p
}
}

if bestPat == nil {
// No entity found β€” escape everything.
return escapeMarkdownV2(text)
}

var b strings.Builder

// Plain text before the entity.
if bestStart > 0 {
b.WriteString(escapeMarkdownV2(text[:bestStart]))
}

// The matched entity span.
matched := text[bestStart:bestEnd]

if verbatimEntities[bestPat.open] {
// Code blocks, URLs, quotes: pass through completely untouched.
b.WriteString(matched)
} else {
// Inline formatting (bold, italic, underline, strikethrough, spoiler):
// keep the delimiters and recursively process the inner content so that
// nested entities survive but stray specials get escaped.
openLen := len(bestPat.open)
closeLen := len(bestPat.close)
inner := matched[openLen : len(matched)-closeLen]

b.WriteString(bestPat.open)
b.WriteString(processText(inner))
b.WriteString(bestPat.close)
}

// Continue with the remainder of the string.
b.WriteString(processText(text[bestEnd:]))

return b.String()
}

// escapeMarkdownV2 escapes every MarkdownV2 special character in a plain-text
// segment (i.e. a segment that is not part of any recognized entity).
// Already-escaped sequences (backslash + char) are forwarded verbatim to avoid
// double-escaping.
func escapeMarkdownV2(s string) string {
var b strings.Builder
b.Grow(len(s) + 8)
runes := []rune(s)
for i := 0; i < len(runes); i++ {
ch := runes[i]
// Forward an existing escape sequence verbatim.
if ch == '\\' && i+1 < len(runes) {
b.WriteRune(ch)
b.WriteRune(runes[i+1])
i++
continue
}
if mdV2SpecialChars[ch] {
b.WriteByte('\\')
}
b.WriteRune(ch)
}
return b.String()
}
68 changes: 68 additions & 0 deletions pkg/channels/telegram/parse_markdown_to_md_v2_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package telegram

import (
_ "embed"
"testing"

"github.com/stretchr/testify/require"
)

//go:embed testdata/md2_all_formats.txt
var md2AllFormats string

func Test_markdownToTelegramMarkdownV2(t *testing.T) {
cases := []struct {
name string
input string
expected string
}{
{
name: "heading -> bolding",
input: `## HeadingH2 #`,
expected: "*HeadingH2 \\#*",
},
{
name: "strikethrough",
input: "~strikethroughMD~",
expected: "~strikethroughMD~",
},
{
name: "inline URL",
input: "[inline URL](http://www.example.com/)",
expected: "[inline URL](http://www.example.com/)",
},
{
name: "all telegram formats",
input: md2AllFormats,
expected: md2AllFormats,
},
{
name: "empty",
input: "",
expected: "",
},
{
name: "one letter",
input: "o",
expected: "o",
},
{
name: "",
input: "*Last update: ~10 24h*",
expected: "*Last update: \\~10 24h*",
},
{
name: "",
input: "<Market Capitalization>",
expected: "\\<Market Capitalization\\>",
},
}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
actual := markdownToTelegramMarkdownV2(tc.input)

require.EqualValues(t, tc.expected, actual)
})
}
}
Loading
Loading