Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 3 additions & 15 deletions pkg/channels/wecom/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ type WeComAppChannel struct {
tokenMu sync.RWMutex
ctx context.Context
cancel context.CancelFunc
processedMsgs map[string]bool // Message deduplication: msg_id -> processed
msgMu sync.RWMutex
processedMsgs *MessageDeduplicator
}

// WeComXMLMessage represents the XML message structure from WeCom
Expand Down Expand Up @@ -144,7 +143,7 @@ func NewWeComAppChannel(cfg config.WeComAppConfig, messageBus *bus.MessageBus) (
client: &http.Client{Timeout: clientTimeout},
ctx: ctx,
cancel: cancel,
processedMsgs: make(map[string]bool),
processedMsgs: NewMessageDeduplicator(wecomMaxProcessedMessages),
}, nil
}

Expand Down Expand Up @@ -607,23 +606,12 @@ func (c *WeComAppChannel) processMessage(ctx context.Context, msg WeComXMLMessag
// Message deduplication: Use msg_id to prevent duplicate processing
// As per WeCom documentation, use msg_id for deduplication
msgID := fmt.Sprintf("%d", msg.MsgId)
c.msgMu.Lock()
if c.processedMsgs[msgID] {
c.msgMu.Unlock()
if !c.processedMsgs.MarkMessageProcessed(msgID) {
logger.DebugCF("wecom_app", "Skipping duplicate message", map[string]any{
"msg_id": msgID,
})
Comment on lines 606 to 612
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

msg.MsgId is stringified and used as the dedupe key without checking for the zero value. If the inbound XML ever omits <MsgId>, it will default to 0 and unrelated messages could be incorrectly treated as duplicates. Consider validating msg.MsgId != 0 (or using a fallback key) before deduping.

Copilot uses AI. Check for mistakes.
return
}
c.processedMsgs[msgID] = true
// Clean up old messages while still holding the lock to avoid a data race
// on len(). Reset the map but re-insert the current msgID so it remains
// deduplicated.
if len(c.processedMsgs) > 1000 {
c.processedMsgs = make(map[string]bool)
c.processedMsgs[msgID] = true
}
c.msgMu.Unlock()

senderID := msg.FromUserName
chatID := senderID // WeCom App uses user ID as chat ID for direct messages
Expand Down
19 changes: 3 additions & 16 deletions pkg/channels/wecom/bot.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"io"
"net/http"
"strings"
"sync"
"time"

"github.com/sipeed/picoclaw/pkg/bus"
Expand All @@ -28,8 +27,7 @@ type WeComBotChannel struct {
client *http.Client
ctx context.Context
cancel context.CancelFunc
processedMsgs map[string]bool // Message deduplication: msg_id -> processed
msgMu sync.RWMutex
processedMsgs *MessageDeduplicator
}

// WeComBotMessage represents the JSON message structure from WeCom Bot (AIBOT)
Expand Down Expand Up @@ -108,7 +106,7 @@ func NewWeComBotChannel(cfg config.WeComConfig, messageBus *bus.MessageBus) (*We
client: &http.Client{Timeout: clientTimeout},
ctx: ctx,
cancel: cancel,
processedMsgs: make(map[string]bool),
processedMsgs: NewMessageDeduplicator(wecomMaxProcessedMessages),
}, nil
}

Expand Down Expand Up @@ -330,23 +328,12 @@ func (c *WeComBotChannel) processMessage(ctx context.Context, msg WeComBotMessag

// Message deduplication: Use msg_id to prevent duplicate processing
msgID := msg.MsgID
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

msg.MsgID is used as the dedupe key without validation. If the inbound JSON is missing msgid (or it is an empty string), all such messages will collapse to the same key and be incorrectly dropped as duplicates; with the new ring implementation, an empty string key also interferes with eviction. Consider explicitly rejecting/handling empty msg IDs before calling the deduplicator (e.g., log + skip, or derive a fallback ID).

Suggested change
msgID := msg.MsgID
msgID := msg.MsgID
if msgID == "" {
// Derive a fallback ID to avoid using an empty string as dedupe key
fallbackID := fmt.Sprintf("fallback:%s:%s:%s", msg.From.UserID, msg.ChatID, msg.MsgType)
logger.DebugCF("wecom", "Empty msg_id received, using fallback dedupe key", map[string]any{
"derived_msg_id": fallbackID,
"from_user": msg.From.UserID,
"chat_id": msg.ChatID,
"msg_type": msg.MsgType,
})
msgID = fallbackID
}

Copilot uses AI. Check for mistakes.
c.msgMu.Lock()
if c.processedMsgs[msgID] {
c.msgMu.Unlock()
if !c.processedMsgs.MarkMessageProcessed(msgID) {
logger.DebugCF("wecom", "Skipping duplicate message", map[string]any{
"msg_id": msgID,
})
return
}
c.processedMsgs[msgID] = true
// Clean up old messages while still holding the lock to avoid a data race
// on len(). Reset the map but re-insert the current msgID so it remains
// deduplicated.
if len(c.processedMsgs) > 1000 {
c.processedMsgs = make(map[string]bool)
c.processedMsgs[msgID] = true
}
c.msgMu.Unlock()

senderID := msg.From.UserID

Expand Down
54 changes: 54 additions & 0 deletions pkg/channels/wecom/dedupe.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package wecom

import "sync"
Comment on lines +1 to +3
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description mentions files like pkg/channels/wecom_dedupe.go and pkg/channels/wecom.go, but the actual changes add pkg/channels/wecom/dedupe.go and update pkg/channels/wecom/{bot.go,app.go}. Please update the description to match the real file paths so reviewers can follow along more easily.

Copilot uses AI. Check for mistakes.

const wecomMaxProcessedMessages = 1000

// MessageDeduplicator provides thread-safe message deduplication using a circular queue (ring buffer)
// combined with a hash map. This ensures fast O(1) lookups while naturally evicting the oldest
// messages without causing "amnesia cliffs" when the limit is reached.
type MessageDeduplicator struct {
mu sync.Mutex
msgs map[string]bool
ring []string
idx int
max int
}

// NewMessageDeduplicator creates a new deduplicator with the specified capacity.
func NewMessageDeduplicator(maxEntries int) *MessageDeduplicator {
if maxEntries <= 0 {
maxEntries = wecomMaxProcessedMessages
}
return &MessageDeduplicator{
msgs: make(map[string]bool, maxEntries),
ring: make([]string, maxEntries),
max: maxEntries,
}
}

// MarkMessageProcessed marks msgID as processed and returns false for duplicates.
func (d *MessageDeduplicator) MarkMessageProcessed(msgID string) bool {
d.mu.Lock()
defer d.mu.Unlock()

// 1. Check for duplicate
if d.msgs[msgID] {
return false
}

// 2. Evict the oldest message at our current ring position (if any)
oldestID := d.ring[d.idx]
if oldestID != "" {
delete(d.msgs, oldestID)
}
Comment on lines +40 to +44
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ring buffer uses an empty string sentinel ("" check) to decide whether to delete an evicted entry. If a real msgID can ever be "", eviction won't delete it and the dedupe set can exceed the configured max and behave incorrectly. Consider rejecting empty msgIDs up front or tracking slot occupancy separately (e.g., size counter / filled flag) so "" can be a valid key.

Copilot uses AI. Check for mistakes.

// 3. Store the new message
d.msgs[msgID] = true
d.ring[d.idx] = msgID

// 4. Advance the circle queue index
Copy link

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo/wording in comment: "circle queue" should be "circular queue" or "ring buffer" for clarity/consistency with the rest of the file.

Suggested change
// 4. Advance the circle queue index
// 4. Advance the circular queue index

Copilot uses AI. Check for mistakes.
d.idx = (d.idx + 1) % d.max

return true
}
83 changes: 83 additions & 0 deletions pkg/channels/wecom/dedupe_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package wecom

import (
"sync"
"testing"
)

func TestMessageDeduplicator_DuplicateDetection(t *testing.T) {
d := NewMessageDeduplicator(wecomMaxProcessedMessages)

if ok := d.MarkMessageProcessed("msg-1"); !ok {
t.Fatalf("first message should be accepted")
}

if ok := d.MarkMessageProcessed("msg-1"); ok {
t.Fatalf("duplicate message should be rejected")
}
}

func TestMessageDeduplicator_ConcurrentSameMessage(t *testing.T) {
d := NewMessageDeduplicator(wecomMaxProcessedMessages)

const goroutines = 64
var wg sync.WaitGroup
wg.Add(goroutines)

results := make(chan bool, goroutines)
for i := 0; i < goroutines; i++ {
go func() {
defer wg.Done()
results <- d.MarkMessageProcessed("msg-concurrent")
}()
}

wg.Wait()
close(results)

successes := 0
for ok := range results {
if ok {
successes++
}
}

if successes != 1 {
t.Fatalf("expected exactly 1 successful mark, got %d", successes)
}
}

func TestMessageDeduplicator_CircularQueueEviction(t *testing.T) {
// Create a deduplicator with a very small capacity to test eviction easily.
capacity := 3
d := NewMessageDeduplicator(capacity)

// Fill the queue.
d.MarkMessageProcessed("msg-1")
d.MarkMessageProcessed("msg-2")
d.MarkMessageProcessed("msg-3")

// At this point, the queue is full. msg-1 is the oldest.
if len(d.msgs) != 3 {
t.Fatalf("expected map size to be 3, got %d", len(d.msgs))
}

// This should evict msg-1 and add msg-4.
if ok := d.MarkMessageProcessed("msg-4"); !ok {
t.Fatalf("msg-4 should be accepted")
}

if len(d.msgs) != 3 {
t.Fatalf("expected map size to remain at max capacity (3), got %d", len(d.msgs))
}

// msg-1 should now be forgotten (evicted).
if ok := d.MarkMessageProcessed("msg-1"); !ok {
t.Fatalf("msg-1 should be accepted again because it was evicted")
}

// msg-2 should have been evicted when we added msg-1 back.
if ok := d.MarkMessageProcessed("msg-2"); !ok {
t.Fatalf("msg-2 should be accepted again because it was evicted")
}
}
Loading