diff --git a/integration-cli/docker_api_swarm_test.go b/integration-cli/docker_api_swarm_test.go index e7d1b983322fc..7b498d5ea9e6f 100644 --- a/integration-cli/docker_api_swarm_test.go +++ b/integration-cli/docker_api_swarm_test.go @@ -27,6 +27,7 @@ import ( "github.com/docker/docker/internal/test/request" "github.com/docker/swarmkit/ca" "github.com/go-check/check" + "github.com/pkg/errors" "gotest.tools/assert" is "gotest.tools/assert/cmp" ) @@ -313,13 +314,24 @@ func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *check.C) { leader *daemon.Daemon // keep track of leader followers []*daemon.Daemon // keep track of followers ) + var lastErr error checkLeader := func(nodes ...*daemon.Daemon) checkF { return func(c *check.C) (interface{}, check.CommentInterface) { // clear these out before each run leader = nil followers = nil for _, d := range nodes { - if d.GetNode(c, d.NodeID()).ManagerStatus.Leader { + n := d.GetNode(c, d.NodeID(), func(err error) bool { + if strings.Contains(errors.Cause(err).Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") { + lastErr = err + return true + } + return false + }) + if n == nil { + return false, check.Commentf("failed to get node: %v", lastErr) + } + if n.ManagerStatus.Leader { leader = d } else { followers = append(followers, d) @@ -391,7 +403,7 @@ func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *check.C) { defer cli.Close() // d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen - waitAndAssert(c, defaultReconciliationTimeout, func(c *check.C) (interface{}, check.CommentInterface) { + waitAndAssert(c, defaultReconciliationTimeout*2, func(c *check.C) (interface{}, check.CommentInterface) { _, err := cli.ServiceCreate(context.Background(), service.Spec, types.ServiceCreateOptions{}) return err.Error(), nil }, checker.Contains, "Make sure more than half of the managers are online.") diff --git a/integration-cli/docker_cli_swarm_test.go b/integration-cli/docker_cli_swarm_test.go index 1b20cbf95d597..b5a4d31d2fe6d 100644 --- a/integration-cli/docker_cli_swarm_test.go +++ b/integration-cli/docker_cli_swarm_test.go @@ -1303,9 +1303,21 @@ func (s *DockerSwarmSuite) TestSwarmRotateUnlockKey(c *check.C) { c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) - outs, err = d.Cmd("node", "ls") - assert.NilError(c, err) - c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") + retry := 0 + for { + // an issue sometimes prevents leader to be available right away + outs, err = d.Cmd("node", "ls") + if err != nil && retry < 5 { + if strings.Contains(outs, "swarm does not have a leader") { + retry++ + time.Sleep(3 * time.Second) + continue + } + } + assert.NilError(c, err) + c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") + break + } unlockKey = newUnlockKey } @@ -1383,9 +1395,21 @@ func (s *DockerSwarmSuite) TestSwarmClusterRotateUnlockKey(c *check.C) { c.Assert(getNodeStatus(c, d), checker.Equals, swarm.LocalNodeStateActive) - outs, err = d.Cmd("node", "ls") - c.Assert(err, checker.IsNil, check.Commentf("%s", outs)) - c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") + retry := 0 + for { + // an issue sometimes prevents leader to be available right away + outs, err = d.Cmd("node", "ls") + if err != nil && retry < 5 { + if strings.Contains(outs, "swarm does not have a leader") { + retry++ + time.Sleep(3 * time.Second) + continue + } + } + c.Assert(err, checker.IsNil, check.Commentf("%s", outs)) + c.Assert(outs, checker.Not(checker.Contains), "Swarm is encrypted and needs to be unlocked") + break + } } unlockKey = newUnlockKey diff --git a/internal/test/daemon/node.go b/internal/test/daemon/node.go index 33dd365429531..d5f6f15263fa4 100644 --- a/internal/test/daemon/node.go +++ b/internal/test/daemon/node.go @@ -15,7 +15,7 @@ import ( type NodeConstructor func(*swarm.Node) // GetNode returns a swarm node identified by the specified id -func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node { +func (d *Daemon) GetNode(t assert.TestingT, id string, errCheck ...func(error) bool) *swarm.Node { if ht, ok := t.(test.HelperT); ok { ht.Helper() } @@ -23,6 +23,13 @@ func (d *Daemon) GetNode(t assert.TestingT, id string) *swarm.Node { defer cli.Close() node, _, err := cli.NodeInspectWithRaw(context.Background(), id) + if err != nil { + for _, f := range errCheck { + if f(err) { + return nil + } + } + } assert.NilError(t, err, "[%s] (*Daemon).GetNode: NodeInspectWithRaw(%q) failed", d.id, id) assert.Check(t, node.ID == id) return &node diff --git a/vendor.conf b/vendor.conf index 165cc5c42be6f..0464839dfc5a1 100644 --- a/vendor.conf +++ b/vendor.conf @@ -130,7 +130,7 @@ github.com/containerd/ttrpc 2a805f71863501300ae1976d29f0454ae003e85a github.com/gogo/googleapis 08a7655d27152912db7aaf4f983275eaf8d128ef # cluster -github.com/docker/swarmkit 142a73731c850daf24d32001aa2358b6ffe36eab # bump_v18.09 branch +github.com/docker/swarmkit 5c86095cef3ff480e69486da50f18fd1b3a0de78 # bump_v18.09 branch github.com/gogo/protobuf v1.0.0 github.com/cloudflare/cfssl 1.3.2 github.com/fernet/fernet-go 1b2437bc582b3cfbb341ee5a29f8ef5b42912ff2 diff --git a/vendor/github.com/docker/swarmkit/agent/agent.go b/vendor/github.com/docker/swarmkit/agent/agent.go index 743072f9dafea..58ebff5934ff0 100644 --- a/vendor/github.com/docker/swarmkit/agent/agent.go +++ b/vendor/github.com/docker/swarmkit/agent/agent.go @@ -575,7 +575,7 @@ func (a *Agent) nodeDescriptionWithHostname(ctx context.Context, tlsInfo *api.No // Override hostname and TLS info if desc != nil { - if a.config.Hostname != "" && desc != nil { + if a.config.Hostname != "" { desc.Hostname = a.config.Hostname } desc.TLSInfo = tlsInfo diff --git a/vendor/github.com/docker/swarmkit/agent/session.go b/vendor/github.com/docker/swarmkit/agent/session.go index 526953509b5bc..2e7f1b6a37081 100644 --- a/vendor/github.com/docker/swarmkit/agent/session.go +++ b/vendor/github.com/docker/swarmkit/agent/session.go @@ -3,6 +3,7 @@ package agent import ( "context" "errors" + "math" "sync" "time" @@ -64,6 +65,7 @@ func newSession(ctx context.Context, agent *Agent, delay time.Duration, sessionI cc, err := agent.config.ConnBroker.Select( grpc.WithTransportCredentials(agent.config.Credentials), grpc.WithTimeout(dispatcherRPCTimeout), + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), ) if err != nil { @@ -136,7 +138,7 @@ func (s *session) start(ctx context.Context, description *api.NodeDescription) e // `ctx` is done and hence fail to propagate the timeout error to the agent. // If the error is not propogated to the agent, the agent will not close // the session or rebuild a new session. - sessionCtx, cancelSession := context.WithCancel(ctx) // nolint: vet + sessionCtx, cancelSession := context.WithCancel(ctx) //nolint:govet // Need to run Session in a goroutine since there's no way to set a // timeout for an individual Recv call in a stream. @@ -159,7 +161,7 @@ func (s *session) start(ctx context.Context, description *api.NodeDescription) e select { case err := <-errChan: if err != nil { - return err // nolint: vet + return err //nolint:govet } case <-time.After(dispatcherRPCTimeout): cancelSession() diff --git a/vendor/github.com/docker/swarmkit/agent/worker.go b/vendor/github.com/docker/swarmkit/agent/worker.go index efe538afa767f..212aa1e12f4ec 100644 --- a/vendor/github.com/docker/swarmkit/agent/worker.go +++ b/vendor/github.com/docker/swarmkit/agent/worker.go @@ -257,13 +257,11 @@ func reconcileTaskState(ctx context.Context, w *worker, assignments []*api.Assig } closeManager := func(tm *taskManager) { - go func(tm *taskManager) { - defer w.closers.Done() - // when a task is no longer assigned, we shutdown the task manager - if err := tm.Close(); err != nil { - log.G(ctx).WithError(err).Error("error closing task manager") - } - }(tm) + defer w.closers.Done() + // when a task is no longer assigned, we shutdown the task manager + if err := tm.Close(); err != nil { + log.G(ctx).WithError(err).Error("error closing task manager") + } // make an attempt at removing. this is best effort. any errors will be // retried by the reaper later. diff --git a/vendor/github.com/docker/swarmkit/api/dispatcher.pb.go b/vendor/github.com/docker/swarmkit/api/dispatcher.pb.go index cc443848deefc..f72d3d9995b5c 100644 --- a/vendor/github.com/docker/swarmkit/api/dispatcher.pb.go +++ b/vendor/github.com/docker/swarmkit/api/dispatcher.pb.go @@ -1664,7 +1664,7 @@ func (p *raftProxyDispatcherServer) Session(r *SessionRequest, stream Dispatcher } streamWrapper := Dispatcher_SessionServerWrapper{ Dispatcher_SessionServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.Session(r, streamWrapper) } @@ -1785,7 +1785,7 @@ func (p *raftProxyDispatcherServer) Tasks(r *TasksRequest, stream Dispatcher_Tas } streamWrapper := Dispatcher_TasksServerWrapper{ Dispatcher_TasksServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.Tasks(r, streamWrapper) } @@ -1836,7 +1836,7 @@ func (p *raftProxyDispatcherServer) Assignments(r *AssignmentsRequest, stream Di } streamWrapper := Dispatcher_AssignmentsServerWrapper{ Dispatcher_AssignmentsServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.Assignments(r, streamWrapper) } diff --git a/vendor/github.com/docker/swarmkit/api/logbroker.pb.go b/vendor/github.com/docker/swarmkit/api/logbroker.pb.go index b6231e941cf5a..5456c85816dc3 100644 --- a/vendor/github.com/docker/swarmkit/api/logbroker.pb.go +++ b/vendor/github.com/docker/swarmkit/api/logbroker.pb.go @@ -1335,7 +1335,7 @@ func (p *raftProxyLogsServer) SubscribeLogs(r *SubscribeLogsRequest, stream Logs } streamWrapper := Logs_SubscribeLogsServerWrapper{ Logs_SubscribeLogsServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.SubscribeLogs(r, streamWrapper) } @@ -1458,7 +1458,7 @@ func (p *raftProxyLogBrokerServer) ListenSubscriptions(r *ListenSubscriptionsReq } streamWrapper := LogBroker_ListenSubscriptionsServerWrapper{ LogBroker_ListenSubscriptionsServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.ListenSubscriptions(r, streamWrapper) } @@ -1509,7 +1509,7 @@ func (p *raftProxyLogBrokerServer) PublishLogs(stream LogBroker_PublishLogsServe } streamWrapper := LogBroker_PublishLogsServerWrapper{ LogBroker_PublishLogsServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.PublishLogs(streamWrapper) } diff --git a/vendor/github.com/docker/swarmkit/api/raft.pb.go b/vendor/github.com/docker/swarmkit/api/raft.pb.go index 058b29450c42a..a32a6001b92fd 100644 --- a/vendor/github.com/docker/swarmkit/api/raft.pb.go +++ b/vendor/github.com/docker/swarmkit/api/raft.pb.go @@ -1746,7 +1746,7 @@ func (p *raftProxyRaftServer) StreamRaftMessage(stream Raft_StreamRaftMessageSer } streamWrapper := Raft_StreamRaftMessageServerWrapper{ Raft_StreamRaftMessageServer: stream, - ctx: ctx, + ctx: ctx, } return p.local.StreamRaftMessage(streamWrapper) } diff --git a/vendor/github.com/docker/swarmkit/api/types.pb.go b/vendor/github.com/docker/swarmkit/api/types.pb.go index f41d5e2033735..f5843467cb405 100644 --- a/vendor/github.com/docker/swarmkit/api/types.pb.go +++ b/vendor/github.com/docker/swarmkit/api/types.pb.go @@ -594,7 +594,7 @@ var MaybeEncryptedRecord_Algorithm_name = map[int32]string{ 2: "FERNET_AES_128_CBC", } var MaybeEncryptedRecord_Algorithm_value = map[string]int32{ - "NONE": 0, + "NONE": 0, "SECRETBOX_SALSA20_POLY1305": 1, "FERNET_AES_128_CBC": 2, } diff --git a/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go b/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go index 61498064702db..d1db2fdc83c16 100644 --- a/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go +++ b/vendor/github.com/docker/swarmkit/manager/dispatcher/dispatcher.go @@ -238,7 +238,7 @@ func (d *Dispatcher) Run(ctx context.Context) error { if err != nil { return err } - if err == nil && len(clusters) == 1 { + if len(clusters) == 1 { heartbeatPeriod, err := gogotypes.DurationFromProto(clusters[0].Spec.Dispatcher.HeartbeatPeriod) if err == nil && heartbeatPeriod > 0 { d.config.HeartbeatPeriod = heartbeatPeriod diff --git a/vendor/github.com/docker/swarmkit/manager/drivers/provider.go b/vendor/github.com/docker/swarmkit/manager/drivers/provider.go index 0d9be6119d1a7..97c36fe73d96b 100644 --- a/vendor/github.com/docker/swarmkit/manager/drivers/provider.go +++ b/vendor/github.com/docker/swarmkit/manager/drivers/provider.go @@ -22,7 +22,7 @@ func (m *DriverProvider) NewSecretDriver(driver *api.Driver) (*SecretDriver, err if m.pluginGetter == nil { return nil, fmt.Errorf("plugin getter is nil") } - if driver == nil && driver.Name == "" { + if driver == nil || driver.Name == "" { return nil, fmt.Errorf("driver specification is nil") } // Search for the specified plugin diff --git a/vendor/github.com/docker/swarmkit/manager/manager.go b/vendor/github.com/docker/swarmkit/manager/manager.go index 08b158db441c9..ba7b00566563e 100644 --- a/vendor/github.com/docker/swarmkit/manager/manager.go +++ b/vendor/github.com/docker/swarmkit/manager/manager.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "fmt" + "math" "net" "os" "path/filepath" @@ -758,6 +759,7 @@ func (m *Manager) updateKEK(ctx context.Context, cluster *api.Cluster) error { func(addr string, timeout time.Duration) (net.Conn, error) { return xnet.DialTimeoutLocal(addr, timeout) }), + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), ) if err != nil { logger.WithError(err).Error("failed to connect to local manager socket after locking the cluster") @@ -1202,12 +1204,8 @@ func newIngressNetwork() *api.Network { }, DriverConfig: &api.Driver{}, IPAM: &api.IPAMOptions{ - Driver: &api.Driver{}, - Configs: []*api.IPAMConfig{ - { - Subnet: "10.255.0.0/16", - }, - }, + Driver: &api.Driver{}, + Configs: []*api.IPAMConfig{}, }, }, } diff --git a/vendor/github.com/docker/swarmkit/manager/orchestrator/restart/restart.go b/vendor/github.com/docker/swarmkit/manager/orchestrator/restart/restart.go index c034183ba290f..c79d02d98f92a 100644 --- a/vendor/github.com/docker/swarmkit/manager/orchestrator/restart/restart.go +++ b/vendor/github.com/docker/swarmkit/manager/orchestrator/restart/restart.go @@ -508,20 +508,13 @@ func (r *Supervisor) Cancel(taskID string) { <-delay.doneCh } -// CancelAll aborts all pending restarts and waits for any instances of -// StartNow that have already triggered to complete. +// CancelAll aborts all pending restarts func (r *Supervisor) CancelAll() { - var cancelled []delayedStart - r.mu.Lock() for _, delay := range r.delays { delay.cancel() } r.mu.Unlock() - - for _, delay := range cancelled { - <-delay.doneCh - } } // ClearServiceHistory forgets restart history related to a given service ID. diff --git a/vendor/github.com/docker/swarmkit/manager/orchestrator/service.go b/vendor/github.com/docker/swarmkit/manager/orchestrator/service.go index 037e493b3026a..c5d298c516ced 100644 --- a/vendor/github.com/docker/swarmkit/manager/orchestrator/service.go +++ b/vendor/github.com/docker/swarmkit/manager/orchestrator/service.go @@ -47,22 +47,27 @@ func SetServiceTasksRemove(ctx context.Context, s *store.MemoryStore, service *a err = s.Batch(func(batch *store.Batch) error { for _, t := range tasks { err := batch.Update(func(tx store.Tx) error { + // the task may have changed for some reason in the meantime + // since we read it out, so we need to get from the store again + // within the boundaries of a transaction + latestTask := store.GetTask(tx, t.ID) + // time travel is not allowed. if the current desired state is // above the one we're trying to go to we can't go backwards. // we have nothing to do and we should skip to the next task - if t.DesiredState > api.TaskStateRemove { + if latestTask.DesiredState > api.TaskStateRemove { // log a warning, though. we shouln't be trying to rewrite // a state to an earlier state log.G(ctx).Warnf( "cannot update task %v in desired state %v to an earlier desired state %v", - t.ID, t.DesiredState, api.TaskStateRemove, + latestTask.ID, latestTask.DesiredState, api.TaskStateRemove, ) return nil } // update desired state to REMOVE - t.DesiredState = api.TaskStateRemove + latestTask.DesiredState = api.TaskStateRemove - if err := store.UpdateTask(tx, t); err != nil { + if err := store.UpdateTask(tx, latestTask); err != nil { log.G(ctx).WithError(err).Errorf("failed transaction: update task desired state to REMOVE") } return nil diff --git a/vendor/github.com/docker/swarmkit/manager/orchestrator/update/updater.go b/vendor/github.com/docker/swarmkit/manager/orchestrator/update/updater.go index 7c977dba1c4b3..4e6a2cc0bd90e 100644 --- a/vendor/github.com/docker/swarmkit/manager/orchestrator/update/updater.go +++ b/vendor/github.com/docker/swarmkit/manager/orchestrator/update/updater.go @@ -501,7 +501,10 @@ func (u *Updater) removeOldTasks(ctx context.Context, batch *store.Batch, remove return fmt.Errorf("task %s not found while trying to shut it down", original.ID) } if t.DesiredState > api.TaskStateRunning { - return fmt.Errorf("task %s was already shut down when reached by updater", original.ID) + return fmt.Errorf( + "task %s was already shut down when reached by updater (state: %v)", + original.ID, t.DesiredState, + ) } t.DesiredState = api.TaskStateShutdown return store.UpdateTask(tx, t) diff --git a/vendor/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go b/vendor/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go index 3094402a586cb..8f1ad331b5ada 100644 --- a/vendor/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go +++ b/vendor/github.com/docker/swarmkit/manager/scheduler/nodeinfo.go @@ -45,8 +45,8 @@ type NodeInfo struct { func newNodeInfo(n *api.Node, tasks map[string]*api.Task, availableResources api.Resources) NodeInfo { nodeInfo := NodeInfo{ - Node: n, - Tasks: make(map[string]*api.Task), + Node: n, + Tasks: make(map[string]*api.Task), ActiveTasksCountByService: make(map[string]int), AvailableResources: availableResources.Copy(), usedHostPorts: make(map[hostPortSpec]struct{}), diff --git a/vendor/github.com/docker/swarmkit/node/node.go b/vendor/github.com/docker/swarmkit/node/node.go index 92bd74884e9f2..1d3e6c4f50ab0 100644 --- a/vendor/github.com/docker/swarmkit/node/node.go +++ b/vendor/github.com/docker/swarmkit/node/node.go @@ -6,6 +6,7 @@ import ( "crypto/tls" "encoding/json" "io/ioutil" + "math" "net" "os" "path/filepath" @@ -896,6 +897,7 @@ func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) opts := []grpc.DialOption{ grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor), grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor), + grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), } insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) opts = append(opts, grpc.WithTransportCredentials(insecureCreds))