From 0c117a073f3d2ca66039d9d2f89bd9d0e3e9ec5a Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 15:49:33 +0100 Subject: [PATCH 01/13] feat(channel): echo voice audio transcription --- config/config.example.json | 3 +++ pkg/agent/loop.go | 42 +++++++++++++++++++++++++++++++ pkg/bus/types.go | 9 ++++--- pkg/channels/base.go | 6 ----- pkg/channels/manager.go | 23 ++++++++++++----- pkg/channels/telegram/telegram.go | 8 ++++++ pkg/config/config.go | 5 ++++ pkg/config/defaults.go | 3 +++ 8 files changed, 84 insertions(+), 15 deletions(-) diff --git a/config/config.example.json b/config/config.example.json index 2f643d41bf..21b65a4793 100644 --- a/config/config.example.json +++ b/config/config.example.json @@ -419,6 +419,9 @@ "enabled": false, "monitor_usb": true }, + "voice": { + "echo_transcription": false + }, "gateway": { "host": "127.0.0.1", "port": 18790 diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 19d13b2bb8..e91be71bc3 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -438,6 +438,8 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou transcriptions = append(transcriptions, result.Text) } + al.sendTranscriptionFeedback(msg.Channel, msg.ChatID, msg.MessageID, transcriptions) + if len(transcriptions) == 0 { return msg } @@ -462,6 +464,37 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou return msg } +// sendTranscriptionFeedback Asynchronously sends feedback to the user +// with the result of audio transcription if the option is enabled. +func (al *AgentLoop) sendTranscriptionFeedback(channel, chatID string, messageID string, validTexts []string) { + if !al.cfg.Voice.EchoTranscription { + return + } + + go func() { + pubCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var feedbackMsg string + if len(validTexts) > 0 { + feedbackMsg = "Transcript: " + strings.Join(validTexts, "\n") + } else { + feedbackMsg = "No voice detected in the audio" + } + + err := al.bus.PublishOutbound(pubCtx, bus.OutboundMessage{ + Channel: channel, + ChatID: chatID, + Content: feedbackMsg, + ReplyToMessageID: messageID, + SkipPlaceholder: true, // It serves to avoid consuming the message "Thinking..." + }) + if err != nil { + logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()}) + } + }() +} + // inferMediaType determines the media type ("image", "audio", "video", "file") // from a filename and MIME content type. func inferMediaType(filename, contentType string) string { @@ -764,6 +797,15 @@ func (al *AgentLoop) runAgentLoop( // 2. Save user message to session agent.Sessions.AddMessage(opts.SessionKey, "user", opts.UserMessage) + // thinking message only for channels, not for background tasks + if opts.Channel != "" && opts.ChatID != "" && !constants.IsInternalChannel(opts.Channel) && !opts.NoHistory { + al.bus.PublishOutbound(ctx, bus.OutboundMessage{ + Channel: opts.Channel, + ChatID: opts.ChatID, + TriggerPlaceholder: true, + }) + } + // 3. Run LLM iteration loop finalContent, iteration, err := al.runLLMIteration(ctx, agent, messages, opts) if err != nil { diff --git a/pkg/bus/types.go b/pkg/bus/types.go index 7ad8f04179..7b7335327c 100644 --- a/pkg/bus/types.go +++ b/pkg/bus/types.go @@ -30,9 +30,12 @@ type InboundMessage struct { } type OutboundMessage struct { - Channel string `json:"channel"` - ChatID string `json:"chat_id"` - Content string `json:"content"` + Channel string `json:"channel"` + ChatID string `json:"chat_id"` + Content string `json:"content"` + ReplyToMessageID string `json:"reply_to_message_id,omitempty"` + SkipPlaceholder bool `json:"skip_placeholder,omitempty"` // Tells Manager not to use Thinking + TriggerPlaceholder bool `json:"trigger_placeholder,omitempty"` } // MediaPart describes a single media attachment to send. diff --git a/pkg/channels/base.go b/pkg/channels/base.go index 063a66523d..334dc92540 100644 --- a/pkg/channels/base.go +++ b/pkg/channels/base.go @@ -284,12 +284,6 @@ func (c *BaseChannel) HandleMessage( c.placeholderRecorder.RecordReactionUndo(c.name, chatID, undo) } } - // Placeholder — independent pipeline - if pc, ok := c.owner.(PlaceholderCapable); ok { - if phID, err := pc.SendPlaceholder(ctx, chatID); err == nil && phID != "" { - c.placeholderRecorder.RecordPlaceholder(c.name, chatID, phID) - } - } } if err := c.bus.PublishInbound(ctx, msg); err != nil { diff --git a/pkg/channels/manager.go b/pkg/channels/manager.go index fdd6d0c1f0..84def63937 100644 --- a/pkg/channels/manager.go +++ b/pkg/channels/manager.go @@ -133,13 +133,15 @@ func (m *Manager) preSend(ctx context.Context, name string, msg bus.OutboundMess } // 3. Try editing placeholder - if v, loaded := m.placeholders.LoadAndDelete(key); loaded { - if entry, ok := v.(placeholderEntry); ok && entry.id != "" { - if editor, ok := ch.(MessageEditor); ok { - if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil { - return true // edited successfully, skip Send + if !msg.SkipPlaceholder { + if v, loaded := m.placeholders.LoadAndDelete(key); loaded { + if entry, ok := v.(placeholderEntry); ok && entry.id != "" { + if editor, ok := ch.(MessageEditor); ok { + if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil { + return true // edited successfully, skip Send + } + // edit failed → fall through to normal Send } - // edit failed → fall through to normal Send } } } @@ -493,6 +495,15 @@ func (m *Manager) sendWithRetry(ctx context.Context, name string, w *channelWork return } + if msg.TriggerPlaceholder { + if pc, ok := w.ch.(PlaceholderCapable); ok { + if phID, err := pc.SendPlaceholder(ctx, msg.ChatID); err == nil && phID != "" { + m.RecordPlaceholder(name, msg.ChatID, phID) + } + } + return + } + // Pre-send: stop typing and try to edit placeholder if m.preSend(ctx, name, msg, w.ch) { return // placeholder was edited successfully, skip Send diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index a2035853c0..1d87572938 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -179,6 +179,14 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err tgMsg := tu.Message(tu.ID(chatID), htmlContent) tgMsg.ParseMode = telego.ModeHTML + if msg.ReplyToMessageID != "" { + if mid, err := strconv.Atoi(msg.ReplyToMessageID); err == nil { + tgMsg.ReplyParameters = &telego.ReplyParameters{ + MessageID: mid, + } + } + } + if _, err = c.bot.SendMessage(ctx, tgMsg); err != nil { logger.ErrorCF("telegram", "HTML parse failed, falling back to plain text", map[string]any{ "error": err.Error(), diff --git a/pkg/config/config.go b/pkg/config/config.go index 72af3e2fb4..8d834229cc 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -58,6 +58,7 @@ type Config struct { Tools ToolsConfig `json:"tools"` Heartbeat HeartbeatConfig `json:"heartbeat"` Devices DevicesConfig `json:"devices"` + Voice VoiceConfig `json:"voice"` } // MarshalJSON implements custom JSON marshaling for Config @@ -424,6 +425,10 @@ type DevicesConfig struct { MonitorUSB bool `json:"monitor_usb" env:"PICOCLAW_DEVICES_MONITOR_USB"` } +type VoiceConfig struct { + EchoTranscription bool `json:"echo_transcription" env:"PICOCLAW_VOICE_ECHO_TRANSCRIPTION"` +} + type ProvidersConfig struct { Anthropic ProviderConfig `json:"anthropic"` OpenAI OpenAIProviderConfig `json:"openai"` diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index 1902480c59..7b690137ae 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -461,5 +461,8 @@ func DefaultConfig() *Config { Enabled: false, MonitorUSB: true, }, + Voice: VoiceConfig{ + EchoTranscription: false, + }, } } From 48d8c8738d3e48fe2d249bb0fecbbcff2d1e01d3 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 16:18:53 +0100 Subject: [PATCH 02/13] discord reply message on transcript echo --- pkg/channels/discord/discord.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pkg/channels/discord/discord.go b/pkg/channels/discord/discord.go index c3bcbff8de..fbfcad1513 100644 --- a/pkg/channels/discord/discord.go +++ b/pkg/channels/discord/discord.go @@ -134,7 +134,7 @@ func (c *DiscordChannel) Send(ctx context.Context, msg bus.OutboundMessage) erro return nil } - return c.sendChunk(ctx, channelID, msg.Content) + return c.sendChunk(ctx, channelID, msg.Content, msg.ReplyToMessageID) } // SendMedia implements the channels.MediaSender interface. @@ -259,14 +259,29 @@ func (c *DiscordChannel) SendPlaceholder(ctx context.Context, chatID string) (st return msg.ID, nil } -func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content string) error { +func (c *DiscordChannel) sendChunk(ctx context.Context, channelID, content, replyToID string) error { // Use the passed ctx for timeout control sendCtx, cancel := context.WithTimeout(ctx, sendTimeout) defer cancel() done := make(chan error, 1) go func() { - _, err := c.session.ChannelMessageSend(channelID, content) + var err error + + // If we have an ID, we send the message as "Reply" + if replyToID != "" { + _, err = c.session.ChannelMessageSendComplex(channelID, &discordgo.MessageSend{ + Content: content, + Reference: &discordgo.MessageReference{ + MessageID: replyToID, + ChannelID: channelID, + }, + }) + } else { + // Otherwise, we send a normal message + _, err = c.session.ChannelMessageSend(channelID, content) + } + done <- err }() From 68bdf661680a89def5d6c7838d224230f1bd4a2a Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 16:24:49 +0100 Subject: [PATCH 03/13] fix lint --- pkg/channels/telegram/telegram.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index 1d87572938..fc6c723619 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -180,7 +180,7 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err tgMsg.ParseMode = telego.ModeHTML if msg.ReplyToMessageID != "" { - if mid, err := strconv.Atoi(msg.ReplyToMessageID); err == nil { + if mid, parseErr := strconv.Atoi(msg.ReplyToMessageID); parseErr == nil { tgMsg.ReplyParameters = &telego.ReplyParameters{ MessageID: mid, } From a0591f0c08beb6152a95f6a664e4a77b087a43b5 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 16:40:26 +0100 Subject: [PATCH 04/13] unit test placeholder logic --- pkg/channels/manager_test.go | 94 +++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/pkg/channels/manager_test.go b/pkg/channels/manager_test.go index f09ecfe2fc..f4af0d01aa 100644 --- a/pkg/channels/manager_test.go +++ b/pkg/channels/manager_test.go @@ -17,16 +17,31 @@ import ( // mockChannel is a test double that delegates Send to a configurable function. type mockChannel struct { BaseChannel - sendFn func(ctx context.Context, msg bus.OutboundMessage) error + sendFn func(ctx context.Context, msg bus.OutboundMessage) error + sentMessages []bus.OutboundMessage + placeholdersSent int + editedMessages int + lastPlaceholderID string } func (m *mockChannel) Send(ctx context.Context, msg bus.OutboundMessage) error { + m.sentMessages = append(m.sentMessages, msg) return m.sendFn(ctx, msg) } func (m *mockChannel) Start(ctx context.Context) error { return nil } func (m *mockChannel) Stop(ctx context.Context) error { return nil } +func (m *mockChannel) SendPlaceholder(ctx context.Context, chatID string) (string, error) { + m.placeholdersSent++ + m.lastPlaceholderID = "mock-ph-123" + return m.lastPlaceholderID, nil +} +func (m *mockChannel) EditMessage(ctx context.Context, chatID, messageID, content string) error { + m.editedMessages++ + return nil +} + // newTestManager creates a minimal Manager suitable for unit tests. func newTestManager() *Manager { return &Manager{ @@ -860,3 +875,80 @@ func TestBuildMediaScope_WithMessageID(t *testing.T) { t.Fatalf("expected %s, got %s", expected, scope) } } + +func TestManager_PlaceholderLogic(t *testing.T) { + mgr := &Manager{ + channels: make(map[string]Channel), + workers: make(map[string]*channelWorker), + placeholders: sync.Map{}, + } + + mockCh := &mockChannel{ + sendFn: func(ctx context.Context, msg bus.OutboundMessage) error { + return nil + }, + } + worker := newChannelWorker("mock", mockCh) + mgr.channels["mock"] = mockCh + mgr.workers["mock"] = worker + + ctx := context.Background() + + // Scenario 1: TriggerPlaceholder creates a placeholder but does NOT send text messages + msgTrigger := bus.OutboundMessage{ + Channel: "mock", + ChatID: "chat-1", + TriggerPlaceholder: true, + } + mgr.sendWithRetry(ctx, "mock", worker, msgTrigger) + + if mockCh.placeholdersSent != 1 { + t.Errorf("expected 1 placeholder sent, got %d", mockCh.placeholdersSent) + } + if len(mockCh.sentMessages) != 0 { + t.Errorf("expected 0 normal messages sent, got %d", len(mockCh.sentMessages)) + } + + // Verify that the placeholder has been registered in the manager + key := "mock:chat-1" + if _, ok := mgr.placeholders.Load(key); !ok { + t.Errorf("expected placeholder to be recorded in manager") + } + + // Scenario 2: SkipPlaceholder (simulates transcription). Must send normally, ignoring Edit. + msgSkip := bus.OutboundMessage{ + Channel: "mock", + ChatID: "chat-1", + Content: "Transcript: hello", + SkipPlaceholder: true, + } + mgr.sendWithRetry(ctx, "mock", worker, msgSkip) + + if mockCh.editedMessages != 0 { + t.Errorf("expected 0 edited messages due to SkipPlaceholder, got %d", mockCh.editedMessages) + } + if len(mockCh.sentMessages) != 1 { + t.Errorf("expected 1 normal message sent, got %d", len(mockCh.sentMessages)) + } + + // The placeholder must still exist for the next response + if _, ok := mgr.placeholders.Load(key); !ok { + t.Errorf("expected placeholder to STILL be in manager after SkipPlaceholder") + } + + // Scenario 3: Normal Message (simulates the final LLM response). Must consume the placeholder. + msgFinal := bus.OutboundMessage{ + Channel: "mock", + ChatID: "chat-1", + Content: "Final Answer", + } + mgr.sendWithRetry(ctx, "mock", worker, msgFinal) + + if mockCh.editedMessages != 1 { + t.Errorf("expected 1 edited message (consuming placeholder), got %d", mockCh.editedMessages) + } + // The placeholder must have been removed + if _, ok := mgr.placeholders.Load(key); ok { + t.Errorf("expected placeholder to be removed after being consumed") + } +} From 73243c901484c44da412aa8add4e494f274a7ead Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 16:45:21 +0100 Subject: [PATCH 05/13] fix lint --- pkg/channels/manager_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/channels/manager_test.go b/pkg/channels/manager_test.go index f4af0d01aa..223c4f4de8 100644 --- a/pkg/channels/manager_test.go +++ b/pkg/channels/manager_test.go @@ -37,6 +37,7 @@ func (m *mockChannel) SendPlaceholder(ctx context.Context, chatID string) (strin m.lastPlaceholderID = "mock-ph-123" return m.lastPlaceholderID, nil } + func (m *mockChannel) EditMessage(ctx context.Context, chatID, messageID, content string) error { m.editedMessages++ return nil From 2effc2b4bdbbd07ee47639a36fbe41c25879bb45 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 18:47:22 +0100 Subject: [PATCH 06/13] slack reply message with audio transcription --- pkg/channels/slack/slack.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/channels/slack/slack.go b/pkg/channels/slack/slack.go index 024b1b0237..640a9ef64d 100644 --- a/pkg/channels/slack/slack.go +++ b/pkg/channels/slack/slack.go @@ -122,6 +122,14 @@ func (c *SlackChannel) Send(ctx context.Context, msg bus.OutboundMessage) error slack.MsgOptionText(msg.Content, false), } + if msg.ReplyToMessageID != "" && threadTS == "" { + // Answer to the message by creating a Thread under it + opts = append(opts, slack.MsgOptionTS(msg.ReplyToMessageID)) + } else if threadTS != "" { + // If we are already in a thread, continue in the thread + opts = append(opts, slack.MsgOptionTS(threadTS)) + } + if threadTS != "" { opts = append(opts, slack.MsgOptionTS(threadTS)) } From 5b1f11aaf6c3b5afbbb5c54a2166db0c4c5eb6f2 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sat, 7 Mar 2026 18:56:38 +0100 Subject: [PATCH 07/13] resolve conflicts --- pkg/channels/telegram/telegram.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index 0e6876de68..8c072f083a 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -210,7 +210,12 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err // sendHTMLChunk sends a single HTML message, falling back to the original // markdown as plain text on parse failure so users never see raw HTML tags. -func (c *TelegramChannel) sendHTMLChunk(ctx context.Context, chatID int64, htmlContent, mdFallback, replyToID string) error { tgMsg := tu.Message(tu.ID(chatID), htmlContent) +func (c *TelegramChannel) sendHTMLChunk( + ctx context.Context, + chatID int64, + htmlContent, mdFallback, replyToID string, +) error { + tgMsg := tu.Message(tu.ID(chatID), htmlContent) tgMsg.ParseMode = telego.ModeHTML if replyToID != "" { From 3b5d04956ef0fe61e39dca2a343b278c12788e27 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sun, 8 Mar 2026 17:41:53 +0100 Subject: [PATCH 08/13] fixed double message on slack thread --- pkg/channels/slack/slack.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/channels/slack/slack.go b/pkg/channels/slack/slack.go index 640a9ef64d..3ee8496213 100644 --- a/pkg/channels/slack/slack.go +++ b/pkg/channels/slack/slack.go @@ -130,10 +130,6 @@ func (c *SlackChannel) Send(ctx context.Context, msg bus.OutboundMessage) error opts = append(opts, slack.MsgOptionTS(threadTS)) } - if threadTS != "" { - opts = append(opts, slack.MsgOptionTS(threadTS)) - } - _, _, err := c.api.PostMessageContext(ctx, channelID, opts...) if err != nil { return fmt.Errorf("slack send: %w", channels.ErrTemporary) From f219ca1263278c7117480f488b7489249010fd6e Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sun, 8 Mar 2026 17:57:43 +0100 Subject: [PATCH 09/13] telegram reply only on first message --- pkg/channels/telegram/telegram.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index 8c072f083a..25811c659d 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -180,6 +180,7 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err // The Manager already splits messages to ≤4000 chars (WithMaxMessageLength), // so msg.Content is guaranteed to be within that limit. We still need to // check if HTML expansion pushes it beyond Telegram's 4096-char API limit. + replyToID := msg.ReplyToMessageID queue := []string{msg.Content} for len(queue) > 0 { chunk := queue[0] @@ -200,9 +201,11 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err continue } - if err := c.sendHTMLChunk(ctx, chatID, htmlContent, chunk, msg.ReplyToMessageID); err != nil { + if err := c.sendHTMLChunk(ctx, chatID, htmlContent, chunk, replyToID); err != nil { return err } + // Only the first chunk should be a reply; subsequent chunks are normal messages. + replyToID = "" } return nil From f87ab99833f36b9df1f16f7eb9b76691bbac7e42 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sun, 8 Mar 2026 18:00:02 +0100 Subject: [PATCH 10/13] fix empty strings on failed transcription --- pkg/agent/loop.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 4d0c24c301..fa521def2d 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -438,12 +438,12 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou transcriptions = append(transcriptions, result.Text) } - al.sendTranscriptionFeedback(msg.Channel, msg.ChatID, msg.MessageID, transcriptions) - if len(transcriptions) == 0 { return msg } + al.sendTranscriptionFeedback(msg.Channel, msg.ChatID, msg.MessageID, transcriptions) + // Replace audio annotations sequentially with transcriptions. idx := 0 newContent := audioAnnotationRe.ReplaceAllStringFunc(msg.Content, func(match string) string { @@ -475,9 +475,16 @@ func (al *AgentLoop) sendTranscriptionFeedback(channel, chatID string, messageID pubCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() + var nonEmpty []string + for _, t := range validTexts { + if t != "" { + nonEmpty = append(nonEmpty, t) + } + } + var feedbackMsg string - if len(validTexts) > 0 { - feedbackMsg = "Transcript: " + strings.Join(validTexts, "\n") + if len(nonEmpty) > 0 { + feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n") } else { feedbackMsg = "No voice detected in the audio" } From 536e26aff155bf2191a5e9208ab311a6ab26f50e Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Sun, 8 Mar 2026 18:22:15 +0100 Subject: [PATCH 11/13] Removed the old heavy logic --- pkg/agent/loop.go | 98 ++++++++++++++++++++---------------- pkg/bus/types.go | 10 ++-- pkg/channels/base.go | 15 ++++++ pkg/channels/manager.go | 44 +++++++++------- pkg/channels/manager_test.go | 96 +++++++++++++++++++++-------------- 5 files changed, 158 insertions(+), 105 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index fa521def2d..d98191f333 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -413,9 +413,10 @@ var audioAnnotationRe = regexp.MustCompile(`\[(voice|audio)(?::[^\]]*)?\]`) // transcribeAudioInMessage resolves audio media refs, transcribes them, and // replaces audio annotations in msg.Content with the transcribed text. -func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) bus.InboundMessage { +// Returns the (possibly modified) message and true if audio was transcribed. +func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.InboundMessage) (bus.InboundMessage, bool) { if al.transcriber == nil || al.mediaStore == nil || len(msg.Media) == 0 { - return msg + return msg, false } // Transcribe each audio media ref in order. @@ -439,10 +440,10 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou } if len(transcriptions) == 0 { - return msg + return msg, false } - al.sendTranscriptionFeedback(msg.Channel, msg.ChatID, msg.MessageID, transcriptions) + al.sendTranscriptionFeedback(ctx, msg.Channel, msg.ChatID, msg.MessageID, transcriptions) // Replace audio annotations sequentially with transcriptions. idx := 0 @@ -461,45 +462,56 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou } msg.Content = newContent - return msg + return msg, true } -// sendTranscriptionFeedback Asynchronously sends feedback to the user -// with the result of audio transcription if the option is enabled. -func (al *AgentLoop) sendTranscriptionFeedback(channel, chatID string, messageID string, validTexts []string) { +// sendTranscriptionFeedback sends feedback to the user with the result of +// audio transcription if the option is enabled. It sends the message directly +// through the channel (bypassing the bus queue) so that ordering with the +// subsequent placeholder is guaranteed. +func (al *AgentLoop) sendTranscriptionFeedback( + ctx context.Context, + channel, chatID, messageID string, + validTexts []string, +) { if !al.cfg.Voice.EchoTranscription { return } + if al.channelManager == nil { + return + } - go func() { - pubCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - var nonEmpty []string - for _, t := range validTexts { - if t != "" { - nonEmpty = append(nonEmpty, t) - } + var nonEmpty []string + for _, t := range validTexts { + if t != "" { + nonEmpty = append(nonEmpty, t) } + } - var feedbackMsg string - if len(nonEmpty) > 0 { - feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n") - } else { - feedbackMsg = "No voice detected in the audio" - } + var feedbackMsg string + if len(nonEmpty) > 0 { + feedbackMsg = "Transcript: " + strings.Join(nonEmpty, "\n") + } else { + feedbackMsg = "No voice detected in the audio" + } - err := al.bus.PublishOutbound(pubCtx, bus.OutboundMessage{ - Channel: channel, - ChatID: chatID, - Content: feedbackMsg, - ReplyToMessageID: messageID, - SkipPlaceholder: true, // It serves to avoid consuming the message "Thinking..." - }) - if err != nil { - logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()}) - } - }() + ch, ok := al.channelManager.GetChannel(channel) + if !ok { + return + } + + sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + err := ch.Send(sendCtx, bus.OutboundMessage{ + Channel: channel, + ChatID: chatID, + Content: feedbackMsg, + ReplyToMessageID: messageID, + }) + if err != nil { + logger.WarnCF("voice", "Failed to send transcription feedback", map[string]any{"error": err.Error()}) + } } // inferMediaType determines the media type ("image", "audio", "video", "file") @@ -613,7 +625,14 @@ func (al *AgentLoop) processMessage(ctx context.Context, msg bus.InboundMessage) }, ) - msg = al.transcribeAudioInMessage(ctx, msg) + var hadAudio bool + msg, hadAudio = al.transcribeAudioInMessage(ctx, msg) + + // For audio messages the placeholder was deferred by the channel. + // Now that transcription (and optional feedback) is done, send it. + if hadAudio && al.channelManager != nil { + al.channelManager.SendPlaceholder(ctx, msg.Channel, msg.ChatID) + } // Route system messages to processSystemMessage if msg.Channel == "system" { @@ -803,15 +822,6 @@ func (al *AgentLoop) runAgentLoop( // 2. Save user message to session agent.Sessions.AddMessage(opts.SessionKey, "user", opts.UserMessage) - // thinking message only for channels, not for background tasks - if opts.Channel != "" && opts.ChatID != "" && !constants.IsInternalChannel(opts.Channel) && !opts.NoHistory { - al.bus.PublishOutbound(ctx, bus.OutboundMessage{ - Channel: opts.Channel, - ChatID: opts.ChatID, - TriggerPlaceholder: true, - }) - } - // 3. Run LLM iteration loop finalContent, iteration, err := al.runLLMIteration(ctx, agent, messages, opts) if err != nil { diff --git a/pkg/bus/types.go b/pkg/bus/types.go index 7b7335327c..12da3f1dd0 100644 --- a/pkg/bus/types.go +++ b/pkg/bus/types.go @@ -30,12 +30,10 @@ type InboundMessage struct { } type OutboundMessage struct { - Channel string `json:"channel"` - ChatID string `json:"chat_id"` - Content string `json:"content"` - ReplyToMessageID string `json:"reply_to_message_id,omitempty"` - SkipPlaceholder bool `json:"skip_placeholder,omitempty"` // Tells Manager not to use Thinking - TriggerPlaceholder bool `json:"trigger_placeholder,omitempty"` + Channel string `json:"channel"` + ChatID string `json:"chat_id"` + Content string `json:"content"` + ReplyToMessageID string `json:"reply_to_message_id,omitempty"` } // MediaPart describes a single media attachment to send. diff --git a/pkg/channels/base.go b/pkg/channels/base.go index 334dc92540..edb5b6f08a 100644 --- a/pkg/channels/base.go +++ b/pkg/channels/base.go @@ -5,6 +5,7 @@ import ( "crypto/rand" "encoding/binary" "encoding/hex" + "regexp" "strconv" "strings" "sync/atomic" @@ -32,6 +33,9 @@ func init() { uniqueIDPrefix = hex.EncodeToString(b[:]) } +// audioAnnotationRe matches audio/voice annotations injected by channels (e.g. [voice], [audio: file.ogg]). +var audioAnnotationRe = regexp.MustCompile(`\[(voice|audio)(?::[^\]]*)?\]`) + // uniqueID generates a process-unique ID using a random prefix and an atomic counter. // This ID is intended for internal correlation (e.g. media scope keys) and is NOT // cryptographically secure — it must not be used in contexts where unpredictability matters. @@ -284,6 +288,17 @@ func (c *BaseChannel) HandleMessage( c.placeholderRecorder.RecordReactionUndo(c.name, chatID, undo) } } + // Placeholder — independent pipeline. + // Skip when the message contains audio: the agent will send the + // placeholder after transcription completes, so the user sees + // "Thinking…" only once the voice has been processed. + if !audioAnnotationRe.MatchString(content) { + if pc, ok := c.owner.(PlaceholderCapable); ok { + if phID, err := pc.SendPlaceholder(ctx, chatID); err == nil && phID != "" { + c.placeholderRecorder.RecordPlaceholder(c.name, chatID, phID) + } + } + } } if err := c.bus.PublishInbound(ctx, msg); err != nil { diff --git a/pkg/channels/manager.go b/pkg/channels/manager.go index d3ed02919a..9e5ceeca19 100644 --- a/pkg/channels/manager.go +++ b/pkg/channels/manager.go @@ -100,6 +100,27 @@ func (m *Manager) RecordPlaceholder(channel, chatID, placeholderID string) { m.placeholders.Store(key, placeholderEntry{id: placeholderID, createdAt: time.Now()}) } +// SendPlaceholder sends a "Thinking…" placeholder for the given channel/chatID +// and records it for later editing. Returns true if a placeholder was sent. +func (m *Manager) SendPlaceholder(ctx context.Context, channel, chatID string) bool { + m.mu.RLock() + ch, ok := m.channels[channel] + m.mu.RUnlock() + if !ok { + return false + } + pc, ok := ch.(PlaceholderCapable) + if !ok { + return false + } + phID, err := pc.SendPlaceholder(ctx, chatID) + if err != nil || phID == "" { + return false + } + m.RecordPlaceholder(channel, chatID, phID) + return true +} + // RecordTypingStop registers a typing stop function for later invocation. // Implements PlaceholderRecorder. func (m *Manager) RecordTypingStop(channel, chatID string, stop func()) { @@ -134,15 +155,13 @@ func (m *Manager) preSend(ctx context.Context, name string, msg bus.OutboundMess } // 3. Try editing placeholder - if !msg.SkipPlaceholder { - if v, loaded := m.placeholders.LoadAndDelete(key); loaded { - if entry, ok := v.(placeholderEntry); ok && entry.id != "" { - if editor, ok := ch.(MessageEditor); ok { - if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil { - return true // edited successfully, skip Send - } - // edit failed → fall through to normal Send + if v, loaded := m.placeholders.LoadAndDelete(key); loaded { + if entry, ok := v.(placeholderEntry); ok && entry.id != "" { + if editor, ok := ch.(MessageEditor); ok { + if err := editor.EditMessage(ctx, msg.ChatID, entry.id, msg.Content); err == nil { + return true // edited successfully, skip Send } + // edit failed → fall through to normal Send } } } @@ -500,15 +519,6 @@ func (m *Manager) sendWithRetry(ctx context.Context, name string, w *channelWork return } - if msg.TriggerPlaceholder { - if pc, ok := w.ch.(PlaceholderCapable); ok { - if phID, err := pc.SendPlaceholder(ctx, msg.ChatID); err == nil && phID != "" { - m.RecordPlaceholder(name, msg.ChatID, phID) - } - } - return - } - // Pre-send: stop typing and try to edit placeholder if m.preSend(ctx, name, msg, w.ch) { return // placeholder was edited successfully, skip Send diff --git a/pkg/channels/manager_test.go b/pkg/channels/manager_test.go index 223c4f4de8..9199285f72 100644 --- a/pkg/channels/manager_test.go +++ b/pkg/channels/manager_test.go @@ -877,7 +877,7 @@ func TestBuildMediaScope_WithMessageID(t *testing.T) { } } -func TestManager_PlaceholderLogic(t *testing.T) { +func TestManager_PlaceholderConsumedByResponse(t *testing.T) { mgr := &Manager{ channels: make(map[string]Channel), workers: make(map[string]*channelWorker), @@ -894,50 +894,37 @@ func TestManager_PlaceholderLogic(t *testing.T) { mgr.workers["mock"] = worker ctx := context.Background() + key := "mock:chat-1" - // Scenario 1: TriggerPlaceholder creates a placeholder but does NOT send text messages - msgTrigger := bus.OutboundMessage{ - Channel: "mock", - ChatID: "chat-1", - TriggerPlaceholder: true, - } - mgr.sendWithRetry(ctx, "mock", worker, msgTrigger) - - if mockCh.placeholdersSent != 1 { - t.Errorf("expected 1 placeholder sent, got %d", mockCh.placeholdersSent) - } - if len(mockCh.sentMessages) != 0 { - t.Errorf("expected 0 normal messages sent, got %d", len(mockCh.sentMessages)) - } + // Simulate a placeholder recorded by base.go HandleMessage + mgr.RecordPlaceholder("mock", "chat-1", "ph-123") - // Verify that the placeholder has been registered in the manager - key := "mock:chat-1" if _, ok := mgr.placeholders.Load(key); !ok { - t.Errorf("expected placeholder to be recorded in manager") + t.Fatal("expected placeholder to be recorded") } - // Scenario 2: SkipPlaceholder (simulates transcription). Must send normally, ignoring Edit. - msgSkip := bus.OutboundMessage{ - Channel: "mock", - ChatID: "chat-1", - Content: "Transcript: hello", - SkipPlaceholder: true, + // Transcription feedback arrives first — it should consume the placeholder + // and be delivered via EditMessage, not Send. + msgTranscript := bus.OutboundMessage{ + Channel: "mock", + ChatID: "chat-1", + Content: "Transcript: hello", } - mgr.sendWithRetry(ctx, "mock", worker, msgSkip) + mgr.sendWithRetry(ctx, "mock", worker, msgTranscript) - if mockCh.editedMessages != 0 { - t.Errorf("expected 0 edited messages due to SkipPlaceholder, got %d", mockCh.editedMessages) + if mockCh.editedMessages != 1 { + t.Errorf("expected 1 edited message (placeholder consumed by transcript), got %d", mockCh.editedMessages) } - if len(mockCh.sentMessages) != 1 { - t.Errorf("expected 1 normal message sent, got %d", len(mockCh.sentMessages)) + if len(mockCh.sentMessages) != 0 { + t.Errorf("expected 0 normal messages (transcript used edit), got %d", len(mockCh.sentMessages)) } - // The placeholder must still exist for the next response - if _, ok := mgr.placeholders.Load(key); !ok { - t.Errorf("expected placeholder to STILL be in manager after SkipPlaceholder") + // Placeholder should be gone now + if _, ok := mgr.placeholders.Load(key); ok { + t.Error("expected placeholder to be removed after being consumed") } - // Scenario 3: Normal Message (simulates the final LLM response). Must consume the placeholder. + // Final LLM response arrives — no placeholder left, so it goes through Send msgFinal := bus.OutboundMessage{ Channel: "mock", ChatID: "chat-1", @@ -945,11 +932,44 @@ func TestManager_PlaceholderLogic(t *testing.T) { } mgr.sendWithRetry(ctx, "mock", worker, msgFinal) - if mockCh.editedMessages != 1 { - t.Errorf("expected 1 edited message (consuming placeholder), got %d", mockCh.editedMessages) + if len(mockCh.sentMessages) != 1 { + t.Errorf("expected 1 normal message sent, got %d", len(mockCh.sentMessages)) } - // The placeholder must have been removed - if _, ok := mgr.placeholders.Load(key); ok { - t.Errorf("expected placeholder to be removed after being consumed") +} + +func TestManager_SendPlaceholder(t *testing.T) { + mgr := &Manager{ + channels: make(map[string]Channel), + workers: make(map[string]*channelWorker), + placeholders: sync.Map{}, + } + + mockCh := &mockChannel{ + sendFn: func(ctx context.Context, msg bus.OutboundMessage) error { + return nil + }, + } + mgr.channels["mock"] = mockCh + + ctx := context.Background() + + // SendPlaceholder should send a placeholder and record it + ok := mgr.SendPlaceholder(ctx, "mock", "chat-1") + if !ok { + t.Fatal("expected SendPlaceholder to succeed") + } + if mockCh.placeholdersSent != 1 { + t.Errorf("expected 1 placeholder sent, got %d", mockCh.placeholdersSent) + } + + key := "mock:chat-1" + if _, loaded := mgr.placeholders.Load(key); !loaded { + t.Error("expected placeholder to be recorded in manager") + } + + // SendPlaceholder on unknown channel should return false + ok = mgr.SendPlaceholder(ctx, "unknown", "chat-1") + if ok { + t.Error("expected SendPlaceholder to fail for unknown channel") } } From f89c9673cbe0f14c42a420f2924ef9ad4ded2e46 Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Mon, 9 Mar 2026 11:38:23 +0100 Subject: [PATCH 12/13] sync sendmessage function --- pkg/agent/loop.go | 16 +-- pkg/channels/manager.go | 33 +++++++ pkg/channels/manager_test.go | 186 +++++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 12 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index d98191f333..a6a41a2ab5 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -466,9 +466,9 @@ func (al *AgentLoop) transcribeAudioInMessage(ctx context.Context, msg bus.Inbou } // sendTranscriptionFeedback sends feedback to the user with the result of -// audio transcription if the option is enabled. It sends the message directly -// through the channel (bypassing the bus queue) so that ordering with the -// subsequent placeholder is guaranteed. +// audio transcription if the option is enabled. It uses Manager.SendMessage +// which executes synchronously (rate limiting, splitting, retry) so that +// ordering with the subsequent placeholder is guaranteed. func (al *AgentLoop) sendTranscriptionFeedback( ctx context.Context, channel, chatID, messageID string, @@ -495,15 +495,7 @@ func (al *AgentLoop) sendTranscriptionFeedback( feedbackMsg = "No voice detected in the audio" } - ch, ok := al.channelManager.GetChannel(channel) - if !ok { - return - } - - sendCtx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - - err := ch.Send(sendCtx, bus.OutboundMessage{ + err := al.channelManager.SendMessage(ctx, bus.OutboundMessage{ Channel: channel, ChatID: chatID, Content: feedbackMsg, diff --git a/pkg/channels/manager.go b/pkg/channels/manager.go index 9e5ceeca19..2f646a0770 100644 --- a/pkg/channels/manager.go +++ b/pkg/channels/manager.go @@ -825,6 +825,39 @@ func (m *Manager) UnregisterChannel(name string) { delete(m.channels, name) } +// SendMessage sends an outbound message synchronously through the channel +// worker's rate limiter and retry logic. It blocks until the message is +// delivered (or all retries are exhausted), which preserves ordering when +// a subsequent operation depends on the message having been sent. +func (m *Manager) SendMessage(ctx context.Context, msg bus.OutboundMessage) error { + m.mu.RLock() + _, exists := m.channels[msg.Channel] + w, wExists := m.workers[msg.Channel] + m.mu.RUnlock() + + if !exists { + return fmt.Errorf("channel %s not found", msg.Channel) + } + if !wExists || w == nil { + return fmt.Errorf("channel %s has no active worker", msg.Channel) + } + + maxLen := 0 + if mlp, ok := w.ch.(MessageLengthProvider); ok { + maxLen = mlp.MaxMessageLength() + } + if maxLen > 0 && len([]rune(msg.Content)) > maxLen { + for _, chunk := range SplitMessage(msg.Content, maxLen) { + chunkMsg := msg + chunkMsg.Content = chunk + m.sendWithRetry(ctx, msg.Channel, w, chunkMsg) + } + } else { + m.sendWithRetry(ctx, msg.Channel, w, msg) + } + return nil +} + func (m *Manager) SendToChannel(ctx context.Context, channelName, chatID, content string) error { m.mu.RLock() _, exists := m.channels[channelName] diff --git a/pkg/channels/manager_test.go b/pkg/channels/manager_test.go index 9199285f72..1f3a628c29 100644 --- a/pkg/channels/manager_test.go +++ b/pkg/channels/manager_test.go @@ -937,6 +937,192 @@ func TestManager_PlaceholderConsumedByResponse(t *testing.T) { } } +func TestSendMessage_Synchronous(t *testing.T) { + m := newTestManager() + + var received []bus.OutboundMessage + ch := &mockChannel{ + sendFn: func(_ context.Context, msg bus.OutboundMessage) error { + received = append(received, msg) + return nil + }, + } + + w := &channelWorker{ + ch: ch, + limiter: rate.NewLimiter(rate.Inf, 1), + } + m.channels["test"] = ch + m.workers["test"] = w + + msg := bus.OutboundMessage{ + Channel: "test", + ChatID: "123", + Content: "hello world", + ReplyToMessageID: "msg-456", + } + + err := m.SendMessage(context.Background(), msg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + // SendMessage is synchronous — message should already be delivered + if len(received) != 1 { + t.Fatalf("expected 1 message sent, got %d", len(received)) + } + if received[0].ReplyToMessageID != "msg-456" { + t.Fatalf("expected ReplyToMessageID msg-456, got %s", received[0].ReplyToMessageID) + } + if received[0].Content != "hello world" { + t.Fatalf("expected content 'hello world', got %s", received[0].Content) + } +} + +func TestSendMessage_UnknownChannel(t *testing.T) { + m := newTestManager() + + msg := bus.OutboundMessage{ + Channel: "nonexistent", + ChatID: "123", + Content: "hello", + } + + err := m.SendMessage(context.Background(), msg) + if err == nil { + t.Fatal("expected error for unknown channel") + } +} + +func TestSendMessage_NoWorker(t *testing.T) { + m := newTestManager() + + ch := &mockChannel{ + sendFn: func(_ context.Context, _ bus.OutboundMessage) error { return nil }, + } + m.channels["test"] = ch + // No worker registered + + msg := bus.OutboundMessage{ + Channel: "test", + ChatID: "123", + Content: "hello", + } + + err := m.SendMessage(context.Background(), msg) + if err == nil { + t.Fatal("expected error when no worker exists") + } +} + +func TestSendMessage_WithRetry(t *testing.T) { + m := newTestManager() + + var callCount int + ch := &mockChannel{ + sendFn: func(_ context.Context, _ bus.OutboundMessage) error { + callCount++ + if callCount == 1 { + return fmt.Errorf("transient: %w", ErrTemporary) + } + return nil + }, + } + + w := &channelWorker{ + ch: ch, + limiter: rate.NewLimiter(rate.Inf, 1), + } + m.channels["test"] = ch + m.workers["test"] = w + + msg := bus.OutboundMessage{ + Channel: "test", + ChatID: "123", + Content: "retry me", + } + + err := m.SendMessage(context.Background(), msg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if callCount != 2 { + t.Fatalf("expected 2 Send calls (1 failure + 1 success), got %d", callCount) + } +} + +func TestSendMessage_WithSplitting(t *testing.T) { + m := newTestManager() + + var received []string + ch := &mockChannelWithLength{ + mockChannel: mockChannel{ + sendFn: func(_ context.Context, msg bus.OutboundMessage) error { + received = append(received, msg.Content) + return nil + }, + }, + maxLen: 5, + } + + w := &channelWorker{ + ch: ch, + limiter: rate.NewLimiter(rate.Inf, 1), + } + m.channels["test"] = ch + m.workers["test"] = w + + msg := bus.OutboundMessage{ + Channel: "test", + ChatID: "123", + Content: "hello world", + } + + err := m.SendMessage(context.Background(), msg) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if len(received) < 2 { + t.Fatalf("expected message to be split into at least 2 chunks, got %d", len(received)) + } +} + +func TestSendMessage_PreservesOrdering(t *testing.T) { + m := newTestManager() + + var order []string + ch := &mockChannel{ + sendFn: func(_ context.Context, msg bus.OutboundMessage) error { + order = append(order, msg.Content) + return nil + }, + } + + w := &channelWorker{ + ch: ch, + limiter: rate.NewLimiter(rate.Inf, 1), + } + m.channels["test"] = ch + m.workers["test"] = w + + // Send two messages sequentially — they must arrive in order + _ = m.SendMessage(context.Background(), bus.OutboundMessage{ + Channel: "test", ChatID: "1", Content: "first", + }) + _ = m.SendMessage(context.Background(), bus.OutboundMessage{ + Channel: "test", ChatID: "1", Content: "second", + }) + + if len(order) != 2 { + t.Fatalf("expected 2 messages, got %d", len(order)) + } + if order[0] != "first" || order[1] != "second" { + t.Fatalf("expected [first, second], got %v", order) + } +} + func TestManager_SendPlaceholder(t *testing.T) { mgr := &Manager{ channels: make(map[string]Channel), From 08cc09e09145b665cdda9dbc738cf5742471eeba Mon Sep 17 00:00:00 2001 From: afjcjsbx Date: Wed, 11 Mar 2026 00:17:10 +0100 Subject: [PATCH 13/13] resolve conflicts --- pkg/channels/telegram/telegram.go | 2 +- pkg/config/config.go | 5 +++++ pkg/config/defaults.go | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/channels/telegram/telegram.go b/pkg/channels/telegram/telegram.go index a0962773e0..4a8d34a9fe 100644 --- a/pkg/channels/telegram/telegram.go +++ b/pkg/channels/telegram/telegram.go @@ -214,7 +214,7 @@ func (c *TelegramChannel) Send(ctx context.Context, msg bus.OutboundMessage) err // sendHTMLChunk sends a single HTML message, falling back to the original // markdown as plain text on parse failure so users never see raw HTML tags. func (c *TelegramChannel) sendHTMLChunk( - ctx context.Context, chatID int64, threadID int, htmlContent, mdFallback string, replyToID string + ctx context.Context, chatID int64, threadID int, htmlContent, mdFallback string, replyToID string, ) error { tgMsg := tu.Message(tu.ID(chatID), htmlContent) tgMsg.ParseMode = telego.ModeHTML diff --git a/pkg/config/config.go b/pkg/config/config.go index 13d5a7306a..7a806c1e1d 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -59,6 +59,7 @@ type Config struct { Tools ToolsConfig `json:"tools"` Heartbeat HeartbeatConfig `json:"heartbeat"` Devices DevicesConfig `json:"devices"` + Voice VoiceConfig `json:"voice"` // BuildInfo contains build-time version information BuildInfo BuildInfo `json:"build_info,omitempty"` } @@ -472,6 +473,10 @@ type DevicesConfig struct { MonitorUSB bool `json:"monitor_usb" env:"PICOCLAW_DEVICES_MONITOR_USB"` } +type VoiceConfig struct { + EchoTranscription bool `json:"echo_transcription" env:"PICOCLAW_VOICE_ECHO_TRANSCRIPTION"` +} + type ProvidersConfig struct { Anthropic ProviderConfig `json:"anthropic"` OpenAI OpenAIProviderConfig `json:"openai"` diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index 5bb3bd1d69..3b1bb1aefc 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -510,6 +510,9 @@ func DefaultConfig() *Config { Enabled: false, MonitorUSB: true, }, + Voice: VoiceConfig{ + EchoTranscription: false, + }, BuildInfo: BuildInfo{ Version: Version, GitCommit: GitCommit,