Skip to content

Commit 170d9b9

Browse files
authored
Merge pull request #13508 from serathius/checkpoints-fix
Lease Checkpoints fix
2 parents 3e391f4 + 48a7aab commit 170d9b9

14 files changed

Lines changed: 326 additions & 77 deletions

File tree

CHANGELOG-3.5.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.1...v3.5.2) and
1414

1515
### etcd server
1616
- Fix [exclude the same alarm type activated by multiple peers](https://github.com/etcd-io/etcd/pull/13476).
17+
- Add [`etcd --experimental-enable-lease-checkpoint-persist`](https://github.com/etcd-io/etcd/pull/13508) flag to enable checkpoint persisting.
18+
- Fix [Lease checkpoints don't prevent to reset ttl on leader change](https://github.com/etcd-io/etcd/pull/13508), requires enabling checkpoint persisting.
1719

1820
<hr>
1921

CHANGELOG-3.6.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0).
3333

3434
- Add [`etcd --log-format`](https://github.com/etcd-io/etcd/pull/13339) flag to support log format.
3535
- Add [`etcd --experimental-max-learners`](https://github.com/etcd-io/etcd/pull/13377) flag to allow configuration of learner max membership.
36+
- Add [`etcd --experimental-enable-lease-checkpoint-persist`](https://github.com/etcd-io/etcd/pull/13508) flag to handle upgrade from v3.5.2 clusters with this feature enabled.
3637
- Fix [non mutating requests pass through quotaKVServer when NOSPACE](https://github.com/etcd-io/etcd/pull/13435)
3738
- Fix [exclude the same alarm type activated by multiple peers](https://github.com/etcd-io/etcd/pull/13467).
3839
- Fix [Provide a better liveness probe for when etcd runs as a Kubernetes pod](https://github.com/etcd-io/etcd/pull/13399)
40+
- Fix [Lease checkpoints don't prevent to reset ttl on leader change](https://github.com/etcd-io/etcd/pull/13508).
3941

4042
### tools/benchmark
4143

server/config/config.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,12 @@ type ServerConfig struct {
149149

150150
ForceNewCluster bool
151151

152-
// EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
152+
// EnableLeaseCheckpoint enables leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.
153153
EnableLeaseCheckpoint bool
154154
// LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints.
155155
LeaseCheckpointInterval time.Duration
156+
// LeaseCheckpointPersist enables persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled.
157+
LeaseCheckpointPersist bool
156158

157159
EnableGRPCGateway bool
158160

server/embed/config.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,9 +315,14 @@ type Config struct {
315315
// Deprecated in v3.5.
316316
// TODO: Delete in v3.6 (https://github.com/etcd-io/etcd/issues/12913)
317317
ExperimentalEnableV2V3 string `json:"experimental-enable-v2v3"`
318-
// ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
318+
// ExperimentalEnableLeaseCheckpoint enables leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.
319319
ExperimentalEnableLeaseCheckpoint bool `json:"experimental-enable-lease-checkpoint"`
320-
ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"`
320+
// ExperimentalEnableLeaseCheckpointPersist enables persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled.
321+
// Requires experimental-enable-lease-checkpoint to be enabled.
322+
// Deprecated in v3.6.
323+
// TODO: Delete in v3.7
324+
ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"`
325+
ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"`
321326
// ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop.
322327
ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"`
323328
ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"`
@@ -704,6 +709,14 @@ func (cfg *Config) Validate() error {
704709
}
705710
}
706711

712+
if !cfg.ExperimentalEnableLeaseCheckpointPersist && cfg.ExperimentalEnableLeaseCheckpoint {
713+
cfg.logger.Warn("Detected that checkpointing is enabled without persistence. Consider enabling experimental-enable-lease-checkpoint-persist")
714+
}
715+
716+
if cfg.ExperimentalEnableLeaseCheckpointPersist && !cfg.ExperimentalEnableLeaseCheckpoint {
717+
return fmt.Errorf("setting experimental-enable-lease-checkpoint-persist requires experimental-enable-lease-checkpoint")
718+
}
719+
707720
return nil
708721
}
709722

server/embed/config_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,56 @@ func TestPeerURLsMapAndTokenFromSRV(t *testing.T) {
297297
}
298298
}
299299

300+
func TestLeaseCheckpointValidate(t *testing.T) {
301+
tcs := []struct {
302+
name string
303+
configFunc func() Config
304+
expectError bool
305+
}{
306+
{
307+
name: "Default config should pass",
308+
configFunc: func() Config {
309+
return *NewConfig()
310+
},
311+
},
312+
{
313+
name: "Enabling checkpoint leases should pass",
314+
configFunc: func() Config {
315+
cfg := *NewConfig()
316+
cfg.ExperimentalEnableLeaseCheckpoint = true
317+
return cfg
318+
},
319+
},
320+
{
321+
name: "Enabling checkpoint leases and persist should pass",
322+
configFunc: func() Config {
323+
cfg := *NewConfig()
324+
cfg.ExperimentalEnableLeaseCheckpoint = true
325+
cfg.ExperimentalEnableLeaseCheckpointPersist = true
326+
return cfg
327+
},
328+
},
329+
{
330+
name: "Enabling checkpoint leases persist without checkpointing itself should fail",
331+
configFunc: func() Config {
332+
cfg := *NewConfig()
333+
cfg.ExperimentalEnableLeaseCheckpointPersist = true
334+
return cfg
335+
},
336+
expectError: true,
337+
},
338+
}
339+
for _, tc := range tcs {
340+
t.Run(tc.name, func(t *testing.T) {
341+
cfg := tc.configFunc()
342+
err := cfg.Validate()
343+
if (err != nil) != tc.expectError {
344+
t.Errorf("config.Validate() = %q, expected error: %v", err, tc.expectError)
345+
}
346+
})
347+
}
348+
}
349+
300350
func TestLogRotation(t *testing.T) {
301351
tests := []struct {
302352
name string

server/embed/etcd.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
210210
ExperimentalEnableDistributedTracing: cfg.ExperimentalEnableDistributedTracing,
211211
UnsafeNoFsync: cfg.UnsafeNoFsync,
212212
EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint,
213+
LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist,
213214
CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit,
214215
CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval,
215216
WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,

server/etcdmain/config.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,9 @@ func newConfig() *config {
282282
fs.BoolVar(&cfg.ec.ExperimentalInitialCorruptCheck, "experimental-initial-corrupt-check", cfg.ec.ExperimentalInitialCorruptCheck, "Enable to check data corruption before serving any client/peer traffic.")
283283
fs.DurationVar(&cfg.ec.ExperimentalCorruptCheckTime, "experimental-corrupt-check-time", cfg.ec.ExperimentalCorruptCheckTime, "Duration of time between cluster corruption check passes.")
284284

285-
fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable to persist lease remaining TTL to prevent indefinite auto-renewal of long lived leases.")
285+
fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpoint, "experimental-enable-lease-checkpoint", false, "Enable leader to send regular checkpoints to other members to prevent reset of remaining TTL on leader change.")
286+
// TODO: delete in v3.7
287+
fs.BoolVar(&cfg.ec.ExperimentalEnableLeaseCheckpointPersist, "experimental-enable-lease-checkpoint-persist", false, "Enable persisting remainingTTL to prevent indefinite auto-renewal of long lived leases. Always enabled in v3.6. Should be used to ensure smooth upgrade from v3.5 clusters with this feature enabled. Requires experimental-enable-lease-checkpoint to be enabled.")
286288
fs.IntVar(&cfg.ec.ExperimentalCompactionBatchLimit, "experimental-compaction-batch-limit", cfg.ec.ExperimentalCompactionBatchLimit, "Sets the maximum revisions deleted in each compaction batch.")
287289
fs.DurationVar(&cfg.ec.ExperimentalCompactionSleepInterval, "experimental-compaction-sleep-interval", cfg.ec.ExperimentalCompactionSleepInterval, "Sets the sleep interval between each compaction batch.")
288290
fs.DurationVar(&cfg.ec.ExperimentalWatchProgressNotifyInterval, "experimental-watch-progress-notify-interval", cfg.ec.ExperimentalWatchProgressNotifyInterval, "Duration of periodic watch progress notifications.")

server/etcdserver/server.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,10 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) {
344344

345345
// always recover lessor before kv. When we recover the mvcc.KV it will reattach keys to its leases.
346346
// If we recover mvcc.KV first, it will attach the keys to the wrong lessor before it recovers.
347-
srv.lessor = lease.NewLessor(srv.Logger(), srv.be, lease.LessorConfig{
347+
srv.lessor = lease.NewLessor(srv.Logger(), srv.be, srv.cluster, lease.LessorConfig{
348348
MinLeaseTTL: int64(math.Ceil(minTTL.Seconds())),
349349
CheckpointInterval: cfg.LeaseCheckpointInterval,
350+
CheckpointPersist: cfg.LeaseCheckpointPersist,
350351
ExpiredLeasesRetryInterval: srv.Cfg.ReqTimeout(),
351352
})
352353

server/lease/leasehttp/http_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func TestRenewHTTP(t *testing.T) {
3131
be, _ := betesting.NewTmpBackend(t, time.Hour, 10000)
3232
defer betesting.Close(t, be)
3333

34-
le := lease.NewLessor(lg, be, lease.LessorConfig{MinLeaseTTL: int64(5)})
34+
le := lease.NewLessor(lg, be, nil, lease.LessorConfig{MinLeaseTTL: int64(5)})
3535
le.Promote(time.Second)
3636
l, err := le.Grant(1, int64(5))
3737
if err != nil {
@@ -55,7 +55,7 @@ func TestTimeToLiveHTTP(t *testing.T) {
5555
be, _ := betesting.NewTmpBackend(t, time.Hour, 10000)
5656
defer betesting.Close(t, be)
5757

58-
le := lease.NewLessor(lg, be, lease.LessorConfig{MinLeaseTTL: int64(5)})
58+
le := lease.NewLessor(lg, be, nil, lease.LessorConfig{MinLeaseTTL: int64(5)})
5959
le.Promote(time.Second)
6060
l, err := le.Grant(1, int64(5))
6161
if err != nil {
@@ -96,7 +96,7 @@ func testApplyTimeout(t *testing.T, f func(*lease.Lease, string) error) {
9696
be, _ := betesting.NewTmpBackend(t, time.Hour, 10000)
9797
defer betesting.Close(t, be)
9898

99-
le := lease.NewLessor(lg, be, lease.LessorConfig{MinLeaseTTL: int64(5)})
99+
le := lease.NewLessor(lg, be, nil, lease.LessorConfig{MinLeaseTTL: int64(5)})
100100
le.Promote(time.Second)
101101
l, err := le.Grant(1, int64(5))
102102
if err != nil {

server/lease/lessor.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"sync"
2525
"time"
2626

27+
"github.com/coreos/go-semver/semver"
2728
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
2829
"go.etcd.io/etcd/server/v3/lease/leasepb"
2930
"go.etcd.io/etcd/server/v3/storage/backend"
@@ -37,6 +38,8 @@ const NoLease = LeaseID(0)
3738
// MaxLeaseTTL is the maximum lease TTL value
3839
const MaxLeaseTTL = 9000000000
3940

41+
var v3_6 = semver.Version{Major: 3, Minor: 6}
42+
4043
var (
4144
forever = time.Time{}
4245

@@ -180,19 +183,29 @@ type lessor struct {
180183
checkpointInterval time.Duration
181184
// the interval to check if the expired lease is revoked
182185
expiredLeaseRetryInterval time.Duration
186+
// whether lessor should always persist remaining TTL (always enabled in v3.6).
187+
checkpointPersist bool
188+
// cluster is used to adapt lessor logic based on cluster version
189+
cluster cluster
190+
}
191+
192+
type cluster interface {
193+
// Version is the cluster-wide minimum major.minor version.
194+
Version() *semver.Version
183195
}
184196

185197
type LessorConfig struct {
186198
MinLeaseTTL int64
187199
CheckpointInterval time.Duration
188200
ExpiredLeasesRetryInterval time.Duration
201+
CheckpointPersist bool
189202
}
190203

191-
func NewLessor(lg *zap.Logger, b backend.Backend, cfg LessorConfig) Lessor {
192-
return newLessor(lg, b, cfg)
204+
func NewLessor(lg *zap.Logger, b backend.Backend, cluster cluster, cfg LessorConfig) Lessor {
205+
return newLessor(lg, b, cluster, cfg)
193206
}
194207

195-
func newLessor(lg *zap.Logger, b backend.Backend, cfg LessorConfig) *lessor {
208+
func newLessor(lg *zap.Logger, b backend.Backend, cluster cluster, cfg LessorConfig) *lessor {
196209
checkpointInterval := cfg.CheckpointInterval
197210
expiredLeaseRetryInterval := cfg.ExpiredLeasesRetryInterval
198211
if checkpointInterval == 0 {
@@ -210,11 +223,13 @@ func newLessor(lg *zap.Logger, b backend.Backend, cfg LessorConfig) *lessor {
210223
minLeaseTTL: cfg.MinLeaseTTL,
211224
checkpointInterval: checkpointInterval,
212225
expiredLeaseRetryInterval: expiredLeaseRetryInterval,
226+
checkpointPersist: cfg.CheckpointPersist,
213227
// expiredC is a small buffered chan to avoid unnecessary blocking.
214228
expiredC: make(chan []*Lease, 16),
215229
stopC: make(chan struct{}),
216230
doneC: make(chan struct{}),
217231
lg: lg,
232+
cluster: cluster,
218233
}
219234
l.initAndRecover()
220235

@@ -351,6 +366,9 @@ func (le *lessor) Checkpoint(id LeaseID, remainingTTL int64) error {
351366
if l, ok := le.leaseMap[id]; ok {
352367
// when checkpointing, we only update the remainingTTL, Promote is responsible for applying this to lease expiry
353368
l.remainingTTL = remainingTTL
369+
if le.shouldPersistCheckpoints() {
370+
l.persistTo(le.b)
371+
}
354372
if le.isPrimary() {
355373
// schedule the next checkpoint as needed
356374
le.scheduleCheckpointIfNeeded(l)
@@ -359,6 +377,15 @@ func (le *lessor) Checkpoint(id LeaseID, remainingTTL int64) error {
359377
return nil
360378
}
361379

380+
func (le *lessor) shouldPersistCheckpoints() bool {
381+
cv := le.cluster.Version()
382+
return le.checkpointPersist || (cv != nil && greaterOrEqual(*cv, v3_6))
383+
}
384+
385+
func greaterOrEqual(first, second semver.Version) bool {
386+
return !first.LessThan(second)
387+
}
388+
362389
// Renew renews an existing lease. If the given lease does not exist or
363390
// has expired, an error will be returned.
364391
func (le *lessor) Renew(id LeaseID) (int64, error) {
@@ -446,6 +473,7 @@ func (le *lessor) Promote(extend time.Duration) {
446473
l.refresh(extend)
447474
item := &LeaseWithTime{id: l.ID, time: l.expiry}
448475
le.leaseExpiredNotifier.RegisterOrUpdate(item)
476+
le.scheduleCheckpointIfNeeded(l)
449477
}
450478

451479
if len(le.leaseMap) < leaseRevokeRate {
@@ -783,9 +811,10 @@ func (le *lessor) initAndRecover() {
783811
ttl: lpb.TTL,
784812
// itemSet will be filled in when recover key-value pairs
785813
// set expiry to forever, refresh when promoted
786-
itemSet: make(map[LeaseItem]struct{}),
787-
expiry: forever,
788-
revokec: make(chan struct{}),
814+
itemSet: make(map[LeaseItem]struct{}),
815+
expiry: forever,
816+
revokec: make(chan struct{}),
817+
remainingTTL: lpb.RemainingTTL,
789818
}
790819
}
791820
le.leaseExpiredNotifier.Init()

0 commit comments

Comments
 (0)