Skip to content

Commit e53196f

Browse files
fix: make webhook payload handlers recover from panics (cherry-pick #24862 for 3.1) (#24914)
Signed-off-by: Jakub Ciolek <[email protected]> Co-authored-by: Jakub Ciolek <[email protected]>
1 parent 16ba5f9 commit e53196f

File tree

4 files changed

+123
-2
lines changed

4 files changed

+123
-2
lines changed

applicationset/webhook/webhook.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ import (
2626
"github.com/go-playground/webhooks/v6/github"
2727
"github.com/go-playground/webhooks/v6/gitlab"
2828
log "github.com/sirupsen/logrus"
29+
30+
"github.com/argoproj/argo-cd/v3/util/guard"
2931
)
3032

3133
const payloadQueueSize = 50000
3234

35+
const panicMsgAppSet = "panic while processing applicationset-controller webhook event"
36+
3337
type WebhookHandler struct {
3438
sync.WaitGroup // for testing
3539
github *github.Webhook
@@ -102,6 +106,7 @@ func NewWebhookHandler(webhookParallelism int, argocdSettingsMgr *argosettings.S
102106
}
103107

104108
func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
109+
compLog := log.WithField("component", "applicationset-webhook")
105110
for i := 0; i < webhookParallelism; i++ {
106111
h.Add(1)
107112
go func() {
@@ -111,7 +116,7 @@ func (h *WebhookHandler) startWorkerPool(webhookParallelism int) {
111116
if !ok {
112117
return
113118
}
114-
h.HandleEvent(payload)
119+
guard.RecoverAndLog(func() { h.HandleEvent(payload) }, compLog, panicMsgAppSet)
115120
}
116121
}()
117122
}

util/guard/guard.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package guard
2+
3+
import (
4+
"runtime/debug"
5+
)
6+
7+
// Minimal logger contract; avoids depending on any specific logging package.
8+
type Logger interface{ Errorf(string, ...any) }
9+
10+
// Run executes fn and recovers a panic, logging a component-specific message.
11+
func RecoverAndLog(fn func(), log Logger, msg string) {
12+
defer func() {
13+
if r := recover(); r != nil {
14+
if log != nil {
15+
log.Errorf("%s: %v %s", msg, r, debug.Stack())
16+
}
17+
}
18+
}()
19+
fn()
20+
}

util/guard/guard_test.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package guard
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"sync"
7+
"testing"
8+
)
9+
10+
type nop struct{}
11+
12+
func (nop) Errorf(string, ...any) {}
13+
14+
// recorder is a thread-safe logger that captures formatted messages.
15+
type recorder struct {
16+
mu sync.Mutex
17+
calls int
18+
msgs []string
19+
}
20+
21+
func (r *recorder) Errorf(format string, args ...any) {
22+
r.mu.Lock()
23+
defer r.mu.Unlock()
24+
r.calls++
25+
r.msgs = append(r.msgs, fmt.Sprintf(format, args...))
26+
}
27+
28+
func TestRun_Recovers(_ *testing.T) {
29+
RecoverAndLog(func() { panic("boom") }, nop{}, "msg") // fails if panic escapes
30+
}
31+
32+
func TestRun_AllowsNextCall(t *testing.T) {
33+
ran := false
34+
RecoverAndLog(func() { panic("boom") }, nop{}, "msg")
35+
RecoverAndLog(func() { ran = true }, nop{}, "msg")
36+
if !ran {
37+
t.Fatal("expected second callback to run")
38+
}
39+
}
40+
41+
func TestRun_LogsMessageAndStack(t *testing.T) {
42+
r := &recorder{}
43+
RecoverAndLog(func() { panic("boom") }, r, "msg")
44+
if r.calls != 1 {
45+
t.Fatalf("expected 1 log call, got %d", r.calls)
46+
}
47+
got := strings.Join(r.msgs, "\n")
48+
if !strings.Contains(got, "msg") {
49+
t.Errorf("expected log to contain message %q; got %q", "msg", got)
50+
}
51+
if !strings.Contains(got, "boom") {
52+
t.Errorf("expected log to contain panic value %q; got %q", "boom", got)
53+
}
54+
// Heuristic check that a stack trace was included.
55+
if !strings.Contains(got, "guard.go") && !strings.Contains(got, "runtime/panic.go") && !strings.Contains(got, "goroutine") {
56+
t.Errorf("expected log to contain a stack trace; got %q", got)
57+
}
58+
}
59+
60+
func TestRun_NilLoggerDoesNotPanic(_ *testing.T) {
61+
var l Logger // nil
62+
RecoverAndLog(func() { panic("boom") }, l, "ignored")
63+
}
64+
65+
func TestRun_NoPanicDoesNotLog(t *testing.T) {
66+
r := &recorder{}
67+
ran := false
68+
RecoverAndLog(func() { ran = true }, r, "msg")
69+
if !ran {
70+
t.Fatal("expected fn to run")
71+
}
72+
if r.calls != 0 {
73+
t.Fatalf("expected 0 log calls, got %d", r.calls)
74+
}
75+
}
76+
77+
func TestRun_ConcurrentPanicsLogged(t *testing.T) {
78+
r := &recorder{}
79+
const n = 10
80+
var wg sync.WaitGroup
81+
wg.Add(n)
82+
for i := 0; i < n; i++ {
83+
go func(i int) {
84+
defer wg.Done()
85+
RecoverAndLog(func() { panic(fmt.Sprintf("boom-%d", i)) }, r, "msg")
86+
}(i)
87+
}
88+
wg.Wait()
89+
if r.calls != n {
90+
t.Fatalf("expected %d log calls, got %d", n, r.calls)
91+
}
92+
}

util/webhook/webhook.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import (
3737
"github.com/argoproj/argo-cd/v3/util/db"
3838
"github.com/argoproj/argo-cd/v3/util/git"
3939
"github.com/argoproj/argo-cd/v3/util/glob"
40+
"github.com/argoproj/argo-cd/v3/util/guard"
4041
"github.com/argoproj/argo-cd/v3/util/settings"
4142
)
4243

@@ -52,6 +53,8 @@ const usernameRegex = `[\w\.][\w\.-]{0,30}[\w\.\$-]?`
5253

5354
const payloadQueueSize = 50000
5455

56+
const panicMsgServer = "panic while processing api-server webhook event"
57+
5558
var _ settingsSource = &settings.SettingsManager{}
5659

5760
type ArgoCDWebhookHandler struct {
@@ -127,6 +130,7 @@ func NewHandler(namespace string, applicationNamespaces []string, webhookParalle
127130
}
128131

129132
func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
133+
compLog := log.WithField("component", "api-server-webhook")
130134
for i := 0; i < webhookParallelism; i++ {
131135
a.Add(1)
132136
go func() {
@@ -136,7 +140,7 @@ func (a *ArgoCDWebhookHandler) startWorkerPool(webhookParallelism int) {
136140
if !ok {
137141
return
138142
}
139-
a.HandleEvent(payload)
143+
guard.RecoverAndLog(func() { a.HandleEvent(payload) }, compLog, panicMsgServer)
140144
}
141145
}()
142146
}

0 commit comments

Comments
 (0)