Skip to content

Commit 93e110a

Browse files
feat(agent): add retry mechanism (#245)
* refactor: move Config from agent to config package * refactor(agent): create a config struct to store agent's configurations * feat(agent): add retry mechanism for sink * feat(agent): log retry attempts * feat(agent): change retry interval strategy to become exponential * feat: make agent retry configurable * feat(columbus): retry if columbus returns 5** status code * feat(agent): skip record instead of returning error on failed sink * refactor: change RetryTimes to MaxRetries * refactor(agent): move retry func to a struct * feat(agent): make skipping record on sink error configurable
1 parent b343574 commit 93e110a

File tree

13 files changed

+400
-55
lines changed

13 files changed

+400
-55
lines changed

agent/agent.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,26 @@ type Agent struct {
2525
sinkFactory *registry.SinkFactory
2626
monitor Monitor
2727
logger log.Logger
28+
retrier *retrier
29+
stopOnSinkError bool
2830
}
2931

3032
// NewAgent returns an Agent with plugin factories.
31-
func NewAgent(ef *registry.ExtractorFactory, pf *registry.ProcessorFactory, sf *registry.SinkFactory, mt Monitor, logger log.Logger) *Agent {
33+
func NewAgent(config Config) *Agent {
34+
mt := config.Monitor
3235
if isNilMonitor(mt) {
3336
mt = new(defaultMonitor)
3437
}
38+
39+
retrier := newRetrier(config.MaxRetries, config.RetryInitialInterval)
3540
return &Agent{
36-
extractorFactory: ef,
37-
processorFactory: pf,
38-
sinkFactory: sf,
41+
extractorFactory: config.ExtractorFactory,
42+
processorFactory: config.ProcessorFactory,
43+
sinkFactory: config.SinkFactory,
44+
stopOnSinkError: config.StopOnSinkError,
3945
monitor: mt,
40-
logger: logger,
46+
logger: config.Logger,
47+
retrier: retrier,
4148
}
4249
}
4350

@@ -234,14 +241,29 @@ func (r *Agent) setupSink(ctx context.Context, sr recipe.SinkRecipe, stream *str
234241
return
235242
}
236243

237-
stream.subscribe(func(records []models.Record) (err error) {
238-
err = sink.Sink(ctx, records)
244+
retryNotification := func(e error, d time.Duration) {
245+
r.logger.Info(
246+
fmt.Sprintf("retrying sink in %d", d),
247+
"sink", sr.Name,
248+
"error", e.Error())
249+
}
250+
stream.subscribe(func(records []models.Record) error {
251+
err := r.retrier.retry(func() error {
252+
err := sink.Sink(ctx, records)
253+
return err
254+
}, retryNotification)
255+
256+
// error (after exhausted retries) will just be skipped and logged
239257
if err != nil {
240-
err = errors.Wrapf(err, "error running sink \"%s\"", sr.Name)
241-
return
258+
r.logger.Error("error running sink", "sink", sr.Name, "error", err.Error())
259+
if !r.stopOnSinkError {
260+
err = nil
261+
}
242262
}
243263

244-
return
264+
// TODO: create a new error to signal stopping stream.
265+
// returning nil so stream wont stop.
266+
return err
245267
}, defaultBatchSize)
246268

247269
return

agent/agent_test.go

Lines changed: 172 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"testing"
7+
"time"
78

89
"github.com/odpf/meteor/agent"
910
"github.com/odpf/meteor/models"
@@ -52,7 +53,12 @@ var finalData = []models.Record{
5253

5354
func TestRunnerRun(t *testing.T) {
5455
t.Run("should return run", func(t *testing.T) {
55-
r := agent.NewAgent(registry.NewExtractorFactory(), registry.NewProcessorFactory(), registry.NewSinkFactory(), nil, test.Logger)
56+
r := agent.NewAgent(agent.Config{
57+
ExtractorFactory: registry.NewExtractorFactory(),
58+
ProcessorFactory: registry.NewProcessorFactory(),
59+
SinkFactory: registry.NewSinkFactory(),
60+
Logger: test.Logger,
61+
})
5662
run := r.Run(validRecipe)
5763
assert.IsType(t, agent.Run{}, run)
5864
assert.Equal(t, validRecipe, run.Recipe)
@@ -67,7 +73,12 @@ func TestRunnerRun(t *testing.T) {
6773
sf := registry.NewSinkFactory()
6874
sf.Register("test-sink", newSink(sink))
6975

70-
r := agent.NewAgent(registry.NewExtractorFactory(), pf, sf, nil, test.Logger)
76+
r := agent.NewAgent(agent.Config{
77+
ExtractorFactory: registry.NewExtractorFactory(),
78+
ProcessorFactory: pf,
79+
SinkFactory: sf,
80+
Logger: test.Logger,
81+
})
7182
run := r.Run(validRecipe)
7283
assert.Error(t, run.Error)
7384
})
@@ -83,7 +94,12 @@ func TestRunnerRun(t *testing.T) {
8394
sf := registry.NewSinkFactory()
8495
sf.Register("test-sink", newSink(sink))
8596

86-
r := agent.NewAgent(ef, registry.NewProcessorFactory(), sf, nil, test.Logger)
97+
r := agent.NewAgent(agent.Config{
98+
ExtractorFactory: ef,
99+
ProcessorFactory: registry.NewProcessorFactory(),
100+
SinkFactory: sf,
101+
Logger: test.Logger,
102+
})
87103
run := r.Run(validRecipe)
88104
assert.Error(t, run.Error)
89105
})
@@ -101,7 +117,12 @@ func TestRunnerRun(t *testing.T) {
101117
pf := registry.NewProcessorFactory()
102118
pf.Register("test-processor", newProcessor(proc))
103119

104-
r := agent.NewAgent(ef, pf, registry.NewSinkFactory(), nil, test.Logger)
120+
r := agent.NewAgent(agent.Config{
121+
ExtractorFactory: ef,
122+
ProcessorFactory: pf,
123+
SinkFactory: registry.NewSinkFactory(),
124+
Logger: test.Logger,
125+
})
105126
run := r.Run(validRecipe)
106127
assert.Error(t, run.Error)
107128
})
@@ -121,7 +142,12 @@ func TestRunnerRun(t *testing.T) {
121142
sf := registry.NewSinkFactory()
122143
sf.Register("test-sink", newSink(sink))
123144

124-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
145+
r := agent.NewAgent(agent.Config{
146+
ExtractorFactory: ef,
147+
ProcessorFactory: pf,
148+
SinkFactory: sf,
149+
Logger: test.Logger,
150+
})
125151
run := r.Run(validRecipe)
126152
assert.Error(t, run.Error)
127153
})
@@ -143,7 +169,12 @@ func TestRunnerRun(t *testing.T) {
143169
sf := registry.NewSinkFactory()
144170
sf.Register("test-sink", newSink(sink))
145171

146-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
172+
r := agent.NewAgent(agent.Config{
173+
ExtractorFactory: ef,
174+
ProcessorFactory: pf,
175+
SinkFactory: sf,
176+
Logger: test.Logger,
177+
})
147178
run := r.Run(validRecipe)
148179
assert.Error(t, run.Error)
149180
})
@@ -167,7 +198,12 @@ func TestRunnerRun(t *testing.T) {
167198
sf := registry.NewSinkFactory()
168199
sf.Register("test-sink", newSink(sink))
169200

170-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
201+
r := agent.NewAgent(agent.Config{
202+
ExtractorFactory: ef,
203+
ProcessorFactory: pf,
204+
SinkFactory: sf,
205+
Logger: test.Logger,
206+
})
171207
run := r.Run(validRecipe)
172208
assert.Error(t, run.Error)
173209
})
@@ -191,7 +227,12 @@ func TestRunnerRun(t *testing.T) {
191227
sf := registry.NewSinkFactory()
192228
sf.Register("test-sink", newSink(sink))
193229

194-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
230+
r := agent.NewAgent(agent.Config{
231+
ExtractorFactory: ef,
232+
ProcessorFactory: pf,
233+
SinkFactory: sf,
234+
Logger: test.Logger,
235+
})
195236
run := r.Run(validRecipe)
196237
assert.Error(t, run.Error)
197238
})
@@ -214,7 +255,12 @@ func TestRunnerRun(t *testing.T) {
214255
sf := registry.NewSinkFactory()
215256
sf.Register("test-sink", newSink(sink))
216257

217-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
258+
r := agent.NewAgent(agent.Config{
259+
ExtractorFactory: ef,
260+
ProcessorFactory: pf,
261+
SinkFactory: sf,
262+
Logger: test.Logger,
263+
})
218264
run := r.Run(validRecipe)
219265
assert.Error(t, run.Error)
220266
})
@@ -244,7 +290,12 @@ func TestRunnerRun(t *testing.T) {
244290
sf := registry.NewSinkFactory()
245291
sf.Register("test-sink", newSink(sink))
246292

247-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
293+
r := agent.NewAgent(agent.Config{
294+
ExtractorFactory: ef,
295+
ProcessorFactory: pf,
296+
SinkFactory: sf,
297+
Logger: test.Logger,
298+
})
248299
run := r.Run(validRecipe)
249300
assert.Error(t, run.Error)
250301
})
@@ -273,12 +324,17 @@ func TestRunnerRun(t *testing.T) {
273324
sf := registry.NewSinkFactory()
274325
sf.Register("test-sink", newSink(sink))
275326

276-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
327+
r := agent.NewAgent(agent.Config{
328+
ExtractorFactory: ef,
329+
ProcessorFactory: pf,
330+
SinkFactory: sf,
331+
Logger: test.Logger,
332+
})
277333
run := r.Run(validRecipe)
278334
assert.Error(t, run.Error)
279335
})
280336

281-
t.Run("should return error when sink fails", func(t *testing.T) {
337+
t.Run("should not return error when sink fails", func(t *testing.T) {
282338
data := []models.Record{
283339
models.NewRecord(&assets.Table{}),
284340
}
@@ -304,7 +360,49 @@ func TestRunnerRun(t *testing.T) {
304360
sf := registry.NewSinkFactory()
305361
sf.Register("test-sink", newSink(sink))
306362

307-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
363+
r := agent.NewAgent(agent.Config{
364+
ExtractorFactory: ef,
365+
ProcessorFactory: pf,
366+
SinkFactory: sf,
367+
Logger: test.Logger,
368+
})
369+
run := r.Run(validRecipe)
370+
assert.NoError(t, run.Error)
371+
})
372+
373+
t.Run("should return error when sink fails if StopOnSinkError is true", func(t *testing.T) {
374+
data := []models.Record{
375+
models.NewRecord(&assets.Table{}),
376+
}
377+
378+
extr := mocks.NewExtractor()
379+
extr.SetEmit(data)
380+
extr.On("Init", mockCtx, validRecipe.Source.Config).Return(nil).Once()
381+
extr.On("Extract", mockCtx, mock.AnythingOfType("plugins.Emit")).Return(nil)
382+
ef := registry.NewExtractorFactory()
383+
ef.Register("test-extractor", newExtractor(extr))
384+
385+
proc := mocks.NewProcessor()
386+
proc.On("Init", mockCtx, validRecipe.Processors[0].Config).Return(nil).Once()
387+
proc.On("Process", mockCtx, data[0]).Return(data[0], nil)
388+
defer proc.AssertExpectations(t)
389+
pf := registry.NewProcessorFactory()
390+
pf.Register("test-processor", newProcessor(proc))
391+
392+
sink := mocks.NewSink()
393+
sink.On("Init", mockCtx, validRecipe.Sinks[0].Config).Return(nil).Once()
394+
sink.On("Sink", mockCtx, data).Return(errors.New("some error"))
395+
defer sink.AssertExpectations(t)
396+
sf := registry.NewSinkFactory()
397+
sf.Register("test-sink", newSink(sink))
398+
399+
r := agent.NewAgent(agent.Config{
400+
ExtractorFactory: ef,
401+
ProcessorFactory: pf,
402+
SinkFactory: sf,
403+
Logger: test.Logger,
404+
StopOnSinkError: true,
405+
})
308406
run := r.Run(validRecipe)
309407
assert.Error(t, run.Error)
310408
})
@@ -335,7 +433,12 @@ func TestRunnerRun(t *testing.T) {
335433
sf := registry.NewSinkFactory()
336434
sf.Register("test-sink", newSink(sink))
337435

338-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
436+
r := agent.NewAgent(agent.Config{
437+
ExtractorFactory: ef,
438+
ProcessorFactory: pf,
439+
SinkFactory: sf,
440+
Logger: test.Logger,
441+
})
339442
run := r.Run(validRecipe)
340443
assert.NoError(t, run.Error)
341444
assert.Equal(t, validRecipe, run.Recipe)
@@ -372,7 +475,55 @@ func TestRunnerRun(t *testing.T) {
372475
monitor.On("RecordRun", monitor_run).Once()
373476
defer monitor.AssertExpectations(t)
374477

375-
r := agent.NewAgent(ef, pf, sf, monitor, test.Logger)
478+
r := agent.NewAgent(agent.Config{
479+
ExtractorFactory: ef,
480+
ProcessorFactory: pf,
481+
SinkFactory: sf,
482+
Monitor: monitor,
483+
Logger: test.Logger,
484+
})
485+
run := r.Run(validRecipe)
486+
assert.NoError(t, run.Error)
487+
assert.Equal(t, validRecipe, run.Recipe)
488+
})
489+
490+
t.Run("should retry if sink returns retry error", func(t *testing.T) {
491+
err := errors.New("some-error")
492+
data := []models.Record{
493+
models.NewRecord(&assets.Table{}),
494+
}
495+
496+
extr := mocks.NewExtractor()
497+
extr.SetEmit(data)
498+
extr.On("Init", mockCtx, validRecipe.Source.Config).Return(nil).Once()
499+
extr.On("Extract", mockCtx, mock.AnythingOfType("plugins.Emit")).Return(nil)
500+
ef := registry.NewExtractorFactory()
501+
ef.Register("test-extractor", newExtractor(extr))
502+
503+
proc := mocks.NewProcessor()
504+
proc.On("Init", mockCtx, validRecipe.Processors[0].Config).Return(nil).Once()
505+
proc.On("Process", mockCtx, data[0]).Return(data[0], nil)
506+
defer proc.AssertExpectations(t)
507+
pf := registry.NewProcessorFactory()
508+
pf.Register("test-processor", newProcessor(proc))
509+
510+
sink := mocks.NewSink()
511+
sink.On("Init", mockCtx, validRecipe.Sinks[0].Config).Return(nil).Once()
512+
sink.On("Sink", mockCtx, data).Return(plugins.NewRetryError(err)).Once()
513+
sink.On("Sink", mockCtx, data).Return(plugins.NewRetryError(err)).Once()
514+
sink.On("Sink", mockCtx, data).Return(nil)
515+
defer sink.AssertExpectations(t)
516+
sf := registry.NewSinkFactory()
517+
sf.Register("test-sink", newSink(sink))
518+
519+
r := agent.NewAgent(agent.Config{
520+
ExtractorFactory: ef,
521+
ProcessorFactory: pf,
522+
SinkFactory: sf,
523+
Logger: test.Logger,
524+
MaxRetries: 2, // need to retry "at least" 2 times since Sink returns RetryError twice
525+
RetryInitialInterval: 1 * time.Millisecond, // this is to override default retry interval to reduce test time
526+
})
376527
run := r.Run(validRecipe)
377528
assert.NoError(t, run.Error)
378529
assert.Equal(t, validRecipe, run.Recipe)
@@ -408,7 +559,12 @@ func TestRunnerRunMultiple(t *testing.T) {
408559
sf := registry.NewSinkFactory()
409560
sf.Register("test-sink", newSink(sink))
410561

411-
r := agent.NewAgent(ef, pf, sf, nil, test.Logger)
562+
r := agent.NewAgent(agent.Config{
563+
ExtractorFactory: ef,
564+
ProcessorFactory: pf,
565+
SinkFactory: sf,
566+
Logger: test.Logger,
567+
})
412568
runs := r.RunMultiple(recipeList)
413569

414570
assert.Len(t, runs, len(recipeList))

agent/config.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
11
package agent
22

3-
// Config contains the configuration for the agent.
3+
import (
4+
"time"
5+
6+
"github.com/odpf/meteor/registry"
7+
"github.com/odpf/salt/log"
8+
)
9+
410
type Config struct {
5-
LogLevel string `mapstructure:"LOG_LEVEL" default:"info"`
6-
StatsdEnabled bool `mapstructure:"STATSD_ENABLED" default:"false"`
7-
StatsdHost string `mapstructure:"STATSD_HOST" default:"localhost:8125"`
8-
StatsdPrefix string `mapstructure:"STATSD_PREFIX" default:"meteor"`
11+
ExtractorFactory *registry.ExtractorFactory
12+
ProcessorFactory *registry.ProcessorFactory
13+
SinkFactory *registry.SinkFactory
14+
Monitor Monitor
15+
Logger log.Logger
16+
MaxRetries int
17+
RetryInitialInterval time.Duration
18+
StopOnSinkError bool
919
}

0 commit comments

Comments
 (0)