Skip to content

Commit c28b200

Browse files
Vast-Starsrenato0307
authored andcommitted
fix(feishu): invalidate cached token on auth error to enable retry recovery (sipeed#1318)
The Lark SDK v3's built-in token retry loop does not clear stale tokens from cache when the server returns error 99991663 (tenant_access_token invalid), causing all API calls to fail until the token naturally expires (~2 hours). - Add tokenCache struct (implementing larkcore.Cache) with Get/Set/InvalidateAll methods and proper expired-entry cleanup - Wire custom cache into lark.NewClient via WithTokenCache() - Add invalidateTokenOnAuthError helper called in all API methods
1 parent 0bffe46 commit c28b200

2 files changed

Lines changed: 85 additions & 4 deletions

File tree

pkg/channels/feishu/feishu_64.go

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,17 @@ import (
2929
"github.com/sipeed/picoclaw/pkg/utils"
3030
)
3131

32+
// errCodeTenantTokenInvalid is the Feishu API error code for an expired/revoked
33+
// tenant_access_token. The Lark SDK's built-in retry does not clear its cache
34+
// on this error, so we do it ourselves.
35+
const errCodeTenantTokenInvalid = 99991663
36+
3237
type FeishuChannel struct {
3338
*channels.BaseChannel
34-
config config.FeishuConfig
35-
client *lark.Client
36-
wsClient *larkws.Client
39+
config config.FeishuConfig
40+
client *lark.Client
41+
wsClient *larkws.Client
42+
tokenCache *tokenCache // custom cache that supports invalidation
3743

3844
botOpenID atomic.Value // stores string; populated lazily for @mention detection
3945

@@ -47,10 +53,12 @@ func NewFeishuChannel(cfg config.FeishuConfig, bus *bus.MessageBus) (*FeishuChan
4753
channels.WithReasoningChannelID(cfg.ReasoningChannelID),
4854
)
4955

56+
tc := newTokenCache()
5057
ch := &FeishuChannel{
5158
BaseChannel: base,
5259
config: cfg,
53-
client: lark.NewClient(cfg.AppID, cfg.AppSecret),
60+
tokenCache: tc,
61+
client: lark.NewClient(cfg.AppID, cfg.AppSecret, lark.WithTokenCache(tc)),
5462
}
5563
ch.SetOwner(ch)
5664
return ch, nil
@@ -147,6 +155,7 @@ func (c *FeishuChannel) EditMessage(ctx context.Context, chatID, messageID, cont
147155
return fmt.Errorf("feishu edit: %w", err)
148156
}
149157
if !resp.Success() {
158+
c.invalidateTokenOnAuthError(resp.Code)
150159
return fmt.Errorf("feishu edit api error (code=%d msg=%s)", resp.Code, resp.Msg)
151160
}
152161
return nil
@@ -186,6 +195,7 @@ func (c *FeishuChannel) SendPlaceholder(ctx context.Context, chatID string) (str
186195
return "", fmt.Errorf("feishu placeholder send: %w", err)
187196
}
188197
if !resp.Success() {
198+
c.invalidateTokenOnAuthError(resp.Code)
189199
return "", fmt.Errorf("feishu placeholder api error (code=%d msg=%s)", resp.Code, resp.Msg)
190200
}
191201

@@ -226,6 +236,7 @@ func (c *FeishuChannel) ReactToMessage(ctx context.Context, chatID, messageID st
226236
return func() {}, fmt.Errorf("feishu react: %w", err)
227237
}
228238
if !resp.Success() {
239+
c.invalidateTokenOnAuthError(resp.Code)
229240
logger.ErrorCF("feishu", "Reaction API error", map[string]any{
230241
"emoji": chosenEmoji,
231242
"message_id": messageID,
@@ -451,6 +462,7 @@ func (c *FeishuChannel) fetchBotOpenID(ctx context.Context) error {
451462
return fmt.Errorf("bot info parse: %w", err)
452463
}
453464
if result.Code != 0 {
465+
c.invalidateTokenOnAuthError(result.Code)
454466
return fmt.Errorf("bot info api error (code=%d)", result.Code)
455467
}
456468
if result.Bot.OpenID == "" {
@@ -593,6 +605,7 @@ func (c *FeishuChannel) downloadResource(
593605
return ""
594606
}
595607
if !resp.Success() {
608+
c.invalidateTokenOnAuthError(resp.Code)
596609
logger.ErrorCF("feishu", "Resource download api error", map[string]any{
597610
"code": resp.Code,
598611
"msg": resp.Msg,
@@ -705,6 +718,7 @@ func (c *FeishuChannel) sendCard(ctx context.Context, chatID, cardContent string
705718
}
706719

707720
if !resp.Success() {
721+
c.invalidateTokenOnAuthError(resp.Code)
708722
return fmt.Errorf("feishu api error (code=%d msg=%s): %w", resp.Code, resp.Msg, channels.ErrTemporary)
709723
}
710724

@@ -730,6 +744,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
730744
return fmt.Errorf("feishu image upload: %w", err)
731745
}
732746
if !uploadResp.Success() {
747+
c.invalidateTokenOnAuthError(uploadResp.Code)
733748
return fmt.Errorf("feishu image upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
734749
}
735750
if uploadResp.Data == nil || uploadResp.Data.ImageKey == nil {
@@ -754,6 +769,7 @@ func (c *FeishuChannel) sendImage(ctx context.Context, chatID string, file *os.F
754769
return fmt.Errorf("feishu image send: %w", err)
755770
}
756771
if !resp.Success() {
772+
c.invalidateTokenOnAuthError(resp.Code)
757773
return fmt.Errorf("feishu image send api error (code=%d msg=%s)", resp.Code, resp.Msg)
758774
}
759775
return nil
@@ -784,6 +800,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
784800
return fmt.Errorf("feishu file upload: %w", err)
785801
}
786802
if !uploadResp.Success() {
803+
c.invalidateTokenOnAuthError(uploadResp.Code)
787804
return fmt.Errorf("feishu file upload api error (code=%d msg=%s)", uploadResp.Code, uploadResp.Msg)
788805
}
789806
if uploadResp.Data == nil || uploadResp.Data.FileKey == nil {
@@ -808,6 +825,7 @@ func (c *FeishuChannel) sendFile(ctx context.Context, chatID string, file *os.Fi
808825
return fmt.Errorf("feishu file send: %w", err)
809826
}
810827
if !resp.Success() {
828+
c.invalidateTokenOnAuthError(resp.Code)
811829
return fmt.Errorf("feishu file send api error (code=%d msg=%s)", resp.Code, resp.Msg)
812830
}
813831
return nil
@@ -830,3 +848,14 @@ func extractFeishuSenderID(sender *larkim.EventSender) string {
830848

831849
return ""
832850
}
851+
852+
// invalidateTokenOnAuthError clears the cached tenant_access_token when the
853+
// Feishu API reports it as invalid (99991663), so the next request fetches a
854+
// fresh one. The Lark SDK's built-in retry does not clear the cache, causing
855+
// all API calls to fail until the token naturally expires (~2 hours).
856+
func (c *FeishuChannel) invalidateTokenOnAuthError(code int) {
857+
if code == errCodeTenantTokenInvalid {
858+
c.tokenCache.InvalidateAll()
859+
logger.WarnCF("feishu", "Invalidated cached token due to auth error", nil)
860+
}
861+
}

pkg/channels/feishu/token_cache.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package feishu
2+
3+
import (
4+
"context"
5+
"sync"
6+
"time"
7+
)
8+
9+
// tokenCache implements larkcore.Cache with an extra InvalidateAll method.
10+
// This works around a bug in the Lark SDK v3 where the built-in token retry
11+
// loop does not clear stale tokens from cache on auth errors.
12+
type tokenCache struct {
13+
mu sync.RWMutex
14+
store map[string]*tokenEntry
15+
}
16+
17+
type tokenEntry struct {
18+
value string
19+
expireAt time.Time
20+
}
21+
22+
func newTokenCache() *tokenCache {
23+
return &tokenCache{store: make(map[string]*tokenEntry)}
24+
}
25+
26+
func (c *tokenCache) Set(_ context.Context, key, value string, ttl time.Duration) error {
27+
c.mu.Lock()
28+
defer c.mu.Unlock()
29+
c.store[key] = &tokenEntry{value: value, expireAt: time.Now().Add(ttl)}
30+
return nil
31+
}
32+
33+
func (c *tokenCache) Get(_ context.Context, key string) (string, error) {
34+
c.mu.Lock()
35+
defer c.mu.Unlock()
36+
e, ok := c.store[key]
37+
if !ok {
38+
return "", nil
39+
}
40+
if e.expireAt.Before(time.Now()) {
41+
delete(c.store, key)
42+
return "", nil
43+
}
44+
return e.value, nil
45+
}
46+
47+
// InvalidateAll removes all cached tokens, forcing fresh acquisition.
48+
func (c *tokenCache) InvalidateAll() {
49+
c.mu.Lock()
50+
defer c.mu.Unlock()
51+
clear(c.store)
52+
}

0 commit comments

Comments
 (0)