From d3fcdb124f8219926249d01f2232d12320e7a9b3 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Mon, 27 Oct 2025 15:47:37 +0100 Subject: [PATCH] Simplify rollout planner --- .../machinedeployment_rollout_planner_test.go | 114 +++++++++++------- ...scale down to 6, random(0).test.log.golden | 31 +++-- ...ailable 1, scale down to 6.test.log.golden | 43 ++++--- ...axunavailable 0, random(0).test.log.golden | 20 ++- ...xsurge 1, maxunavailable 0.test.log.golden | 23 +++- ...axunavailable 0, random(0).test.log.golden | 41 ++++++- ...surge 10, maxunavailable 0.test.log.golden | 23 +++- 7 files changed, 207 insertions(+), 88 deletions(-) diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go b/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go index e7ddb77d95e8..874d4bcd6e2f 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go @@ -21,6 +21,7 @@ import ( "fmt" "math/rand" "os" + "slices" "sort" "strconv" "strings" @@ -505,13 +506,13 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop // TODO(in-place): when implementing in-place changes in the MachineSetController code make sure to: // - detect if there are replicas still pending AcknowledgeMove first, including also handling cleanup of the pendingAcknowledgeMoveAnnotationName on machines // - when deleting or moving - // - first move if possible, then delete - // - move machines should be capped to avoid unnecessary in-place upgrades (in case of scale down in the middle of rollouts); remaining part should be deleted - // - do not move machines pending a move Acknowledge, with an in-place upgrade in progress, deleted or marked for deletion, unhealthy + // - move or delete are mutually exclusive + // - do not move machines pending a move Acknowledge, with an in-place upgrade in progress, deleted or marked for deletion (TBD if to be remediated) + // - when deleting, machines updating in place should have higher priority than other machines (first get rid of not at the desired state/try to avoid unnecessary work; also those machines are unavailable). // Sort machines to ensure stable results of move/delete operations during tests. // TODO(in-place): this should not be implemented in production code for the MachineSet controller - sortMachineSetMachines(scope.machineSetMachines[ms.Name]) + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) // Removing updatingInPlaceAnnotation from machines after pendingAcknowledgeMove is gone in a previous reconcile (so inPlaceUpdating lasts one reconcile more) // NOTE: This is a testing workaround to simulate inPlaceUpdating being completed; it is implemented in the fake MachineSet controller @@ -620,15 +621,6 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, which does not exists", ms.Name, targetMSName) } - // Limit the number of machines to be moved to avoid to exceed the final number of replicas for the target MS. - // TODO(in-place): consider using the DesiredReplicasAnnotation which is propagated together with the other decisions of the rollout planner - // another option to consider is to shift this check on the newMS (so we perform move, but we don't start the in-place upgrade). - machinesToMove := min(machinesToDeleteOrMove, ptr.Deref(scope.machineDeployment.Spec.Replicas, 0)-ptr.Deref(targetMS.Spec.Replicas, 0)) - if machinesToMove != machinesToDeleteOrMove { - log.Logf("[MS controller] - Move capped to %d replicas to avoid unnecessary in-place upgrades", machinesToMove) - } - machinesToDeleteOrMove -= machinesToMove - validSourceMSs := targetMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] sourcesSet := sets.Set[string]{} sourcesSet.Insert(strings.Split(validSourceMSs, ",")...) @@ -636,11 +628,14 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, but %[2]s only accepts machines from %s", ms.Name, targetMS.Name, validSourceMSs) } + // Always move all the machine that can be moved. + // In case the target machine set will end up with more machines than its target replica number, it will take care of this. + machinesToMove := machinesToDeleteOrMove machinesMoved := []string{} machinesSetMachines := []*clusterv1.Machine{} for i, m := range scope.machineSetMachines[ms.Name] { - // Make sure we are not moving machines still pending AcknowledgeMove - if notAcknowledgeMoveReplicas.Has(m.Name) { + // Make sure we are not moving machines still updating in place (this includes also machines still pending AcknowledgeMove). + if _, ok := m.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { machinesSetMachines = append(machinesSetMachines, m) continue } @@ -672,31 +667,39 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop // Sort machines of the target MS to ensure consistent reporting during tests. // Note: this is required because a machine can be moved to a target MachineSet that has been already reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile). - sortMachineSetMachines(scope.machineSetMachines[targetMS.Name]) + sortMachineSetMachinesByName(scope.machineSetMachines[targetMS.Name]) } - } - } - - if machinesToDeleteOrMove > 0 { - machinesToDelete := machinesToDeleteOrMove - machinesDeleted := []string{} - machinesSetMachines := []*clusterv1.Machine{} - for i, m := range scope.machineSetMachines[ms.Name] { - // Prevent deletion of machines not yet acknowledged after a move operation. - // Note: as soon as an in-place upgrade is started, CAPI Should always try to complete it - // before taking further actions on the same machine. - if notAcknowledgeMoveReplicas.Has(m.Name) { - machinesSetMachines = append(machinesSetMachines, m) - continue - } - if int32(len(machinesDeleted)) >= machinesToDelete { - machinesSetMachines = append(machinesSetMachines, scope.machineSetMachines[ms.Name][i:]...) - break + } else { + machinesToDelete := machinesToDeleteOrMove + machinesDeleted := []string{} + + // Note: in the test code exceeding machines are deleted in predictable order, so it is easier to write test case and validate rollout sequences. + // e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3. + // note: In case the system has to delete some replicas, and those replicas are still updating in place, they gets deleted first. + // + // This prevents the system to perform unnecessary in-place updates. e.g. + // - In place rollout of MD with 3 Replicas, maxSurge 1, MaxUnavailable 0 + // - First create m4 to create a buffer for doing in place + // - Move old machines (m1, m2, m3) + // - Resulting new MS at this point has 4 replicas m1, m2, m3 (updating in place) and (m4). + // - The system scales down MS, and the system does this getting rid of m3 - the last replica that started in place. + machinesSetMachinesSortedByDeletePriority := sortMachineSetMachinesByDeletionPriorityAndName(scope.machineSetMachines[ms.Name]) + machinesSetMachines := []*clusterv1.Machine{} + for i, m := range machinesSetMachinesSortedByDeletePriority { + if int32(len(machinesDeleted)) >= machinesToDelete { + machinesSetMachines = append(machinesSetMachines, machinesSetMachinesSortedByDeletePriority[i:]...) + break + } + machinesDeleted = append(machinesDeleted, m.Name) } - machinesDeleted = append(machinesDeleted, m.Name) + scope.machineSetMachines[ms.Name] = machinesSetMachines + + // Sort machines of the target MS to ensure consistent reporting during tests. + // Note: this is required specifically by the test code because a machine can be moved to a target MachineSet that has been already + // reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile), and test machinery assumes machine are sorted. + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) + log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ",")) } - scope.machineSetMachines[ms.Name] = machinesSetMachines - log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ",")) } // Update counters @@ -1041,12 +1044,41 @@ func (l *fileLogger) WriteLogAndCompareWithGoldenFile() (string, string, error) return current, golden, nil } -func sortMachineSetMachines(machines []*clusterv1.Machine) { +func sortMachineSetMachinesByName(machines []*clusterv1.Machine) { sort.Slice(machines, func(i, j int) bool { - iIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[i].Name, "m")) - jiIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[j].Name, "m")) - return iIndex < jiIndex + iNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[i].Name, "m")) + jNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[j].Name, "m")) + return iNameIndex < jNameIndex + }) +} + +func sortMachineSetMachinesByDeletionPriorityAndName(machines []*clusterv1.Machine) []*clusterv1.Machine { + machinesSetMachinesSortedByDeletePriority := slices.Clone(machines) + + // Note: machines updating in place must be deleted first. + // in case of ties: + // - if both machines are updating in place, delete first the machine with the highest machine NameIndex (e.g. between m3 and m4, pick m4, aka the last machine being moved) + // - if both machines are not updating in place, delete first the machine with the lowest machine NameIndex (e.g. between m3 and m4, pick m3) + sort.Slice(machinesSetMachinesSortedByDeletePriority, func(i, j int) bool { + iPriority := 100 + if _, ok := machinesSetMachinesSortedByDeletePriority[i].Annotations[clusterv1.UpdateInProgressAnnotation]; ok { + iPriority = 1 + } + jPriority := 100 + if _, ok := machinesSetMachinesSortedByDeletePriority[j].Annotations[clusterv1.UpdateInProgressAnnotation]; ok { + jPriority = 1 + } + if iPriority == jPriority { + iNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machinesSetMachinesSortedByDeletePriority[i].Name, "m")) + jNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machinesSetMachinesSortedByDeletePriority[j].Name, "m")) + if iPriority == 1 { + return iNameIndex > jNameIndex + } + return iNameIndex < jNameIndex + } + return iPriority < jPriority }) + return machinesSetMachinesSortedByDeletePriority } // default task order ensure the controllers are run in a consistent and predictable way: md, ms1, ms2 and so on. diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden index 13d89916a941..dbd07e2d86c9 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6, random(0).test.log.golden @@ -18,20 +18,19 @@ - ms2, 3/3 replicas (m1,m2,m3 <= ms1) [Toleration] tolerate maxSurge breach [MS controller] Iteration 2, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12 => ms2) -[MS controller] - Move capped to 3 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 6/2 replicas (m4,m5,m6 moved to ms2) -[MS controller] - ms1 scale down to 2/2 replicas (m7,m8,m9,m10 deleted) +[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 moved to ms2) [MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12 => ms2) [MD controller] Iteration 3, Reconcile md [MD controller] - Input to rollout planner md, 12/6 replicas - ms1, 2/2 replicas (m11,m12 => ms2) - - ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1) + - ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1) [MD controller] - Result of rollout planner md, 5/6 replicas - ms1, 2/2 replicas (m11,m12 => ms2) - - ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1) -[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1) + - ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1) +[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m10,m9,m8,m7 deleted) [MD controller] Iteration 4, Reconcile md [MD controller] - Input to rollout planner md, 5/6 replicas @@ -75,12 +74,24 @@ - ms1, 2/0 replicas (m11,m12 => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) [MS controller] Iteration 8, Reconcile ms1, 2/0 replicas (m11,m12 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 2/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted) -[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) +[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 moved to ms2) +[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1) +[MS controller] - Replicas m11,m12 moved from an old MachineSet still pending acknowledge from machine deployment md [MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ( => ms2) [MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1) +[MD controller] - Result of rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1) +[MS controller] Iteration 10, Reconcile ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m12,m11 deleted) +[MS controller] Iteration 10, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 11, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) +[MD controller] Iteration 11, Reconcile md [MD controller] - Input to rollout planner md, 8/6 replicas - ms1, 0/0 replicas ( => ms2) diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6.test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6.test.log.golden index e941386f8a89..b33fdd7490e0 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6.test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 12 replicas, maxsurge 3, maxunavailable 1, scale down to 6.test.log.golden @@ -16,25 +16,25 @@ - ms2, 3/3 replicas (m1,m2,m3 <= ms1) [Toleration] tolerate maxSurge breach [MS controller] Iteration 1, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12 => ms2) -[MS controller] - Move capped to 3 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 6/2 replicas (m4,m5,m6 moved to ms2) -[MS controller] - ms1 scale down to 2/2 replicas (m7,m8,m9,m10 deleted) -[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1) -[MS controller] - Replicas m4,m5,m6 moved from an old MachineSet still pending acknowledge from machine deployment md +[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 moved to ms2) +[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1) +[MS controller] - Replicas m10,m4,m5,m6,m7,m8,m9 moved from an old MachineSet still pending acknowledge from machine deployment md [MD controller] Iteration 2, Reconcile md [MD controller] - Input to rollout planner md, 12/6 replicas - ms1, 2/2 replicas (m11,m12 => ms2) - - ms2, 6/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1) + - ms2, 10/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1) [MD controller] - Result of rollout planner - md, 8/6 replicas + md, 12/6 replicas - ms1, 2/2 replicas (m11,m12 => ms2) - - ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1) + - ms2, 10/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1) +[Toleration] tolerate maxSurge breach [MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12 => ms2) -[MS controller] Iteration 2, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1) +[MS controller] Iteration 2, Reconcile ms2, 10/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m10,m9,m8,m7 deleted) [MD controller] Iteration 3, Reconcile md [MD controller] - Input to rollout planner - md, 8/6 replicas + md, 12/6 replicas - ms1, 2/2 replicas (m11,m12 => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1) [MD controller] - Result of rollout planner @@ -54,11 +54,22 @@ - ms1, 2/0 replicas (m11,m12 => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) [MS controller] Iteration 4, Reconcile ms1, 2/0 replicas (m11,m12 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 2/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted) -[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) +[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 moved to ms2) +[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1) +[MS controller] - Replicas m11,m12 moved from an old MachineSet still pending acknowledge from machine deployment md [MD controller] Iteration 5, Reconcile md +[MD controller] - Input to rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1) +[MD controller] - Result of rollout planner + md, 8/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1) +[MS controller] Iteration 5, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 5, Reconcile ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m12,m11 deleted) +[MD controller] Iteration 6, Reconcile md [MD controller] - Input to rollout planner md, 8/6 replicas - ms1, 0/0 replicas ( => ms2) @@ -67,8 +78,8 @@ md, 6/6 replicas - ms1, 0/0 replicas ( => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) -[MS controller] Iteration 5, Reconcile ms1, 0/0 replicas ( => ms2) -[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) +[MS controller] Iteration 6, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 6, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1) [Test] Final state md, 6/6 replicas - ms1, 0/0 replicas ( => ms2) diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden index 4af0a79f8235..77ab587ec391 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0, random(0).test.log.golden @@ -128,18 +128,28 @@ [MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1) [MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1) [MS controller] Iteration 16, Reconcile ms1, 1/0 replicas (m3 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 1/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted) +[MS controller] - ms1 scale down to 0/0 replicas (m3 moved to ms2) [MD controller] Iteration 16, Reconcile md [MD controller] - Input to rollout planner md, 4/3 replicas - ms1, 0/0 replicas ( => ms2) - - ms2, 3/3 replicas (m1,m2,m4 <= ms1) + - ms2, 3/3 replicas (m1,m2,m3🟠🟡,m4 <= ms1) [MD controller] - Result of rollout planner md, 3/3 replicas - ms1, 0/0 replicas ( => ms2) - - ms2, 3/3 replicas (m1,m2,m4 <= ms1) + - ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MD controller] Iteration 17, Reconcile md +[MD controller] - Input to rollout planner + md, 3/3 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MD controller] - Result of rollout planner + md, 3/3 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MS controller] Iteration 17, Reconcile ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MS controller] - ms2 scale down to 3/3 replicas (m3 deleted) +[MS controller] Iteration 17, Reconcile ms1, 0/0 replicas ( => ms2) [Test] Final state md, 3/3 replicas - ms1, 0/0 replicas ( => ms2) diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0.test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0.test.log.golden index 0adb62f62a95..2509e2674f36 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0.test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 3 replicas, maxsurge 1, maxunavailable 0.test.log.golden @@ -97,11 +97,22 @@ - ms1, 1/0 replicas (m3 => ms2) - ms2, 3/3 replicas (m1,m2,m4 <= ms1) [MS controller] Iteration 8, Reconcile ms1, 1/0 replicas (m3 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 1/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted) -[MS controller] Iteration 8, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1) +[MS controller] - ms1 scale down to 0/0 replicas (m3 moved to ms2) +[MS controller] Iteration 8, Reconcile ms2, 3/3 replicas (m1,m2,m3🟠🟡,m4 <= ms1) +[MS controller] - Replicas m3 moved from an old MachineSet still pending acknowledge from machine deployment md [MD controller] Iteration 9, Reconcile md +[MD controller] - Input to rollout planner + md, 4/3 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 4/3 replicas (m1,m2,m3🟠🟡,m4 <= ms1) +[MD controller] - Result of rollout planner + md, 4/3 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 4/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 9, Reconcile ms2, 4/3 replicas (m1,m2,m3🟡,m4 <= ms1) +[MS controller] - ms2 scale down to 3/3 replicas (m3 deleted) +[MD controller] Iteration 10, Reconcile md [MD controller] - Input to rollout planner md, 4/3 replicas - ms1, 0/0 replicas ( => ms2) @@ -110,8 +121,8 @@ md, 3/3 replicas - ms1, 0/0 replicas ( => ms2) - ms2, 3/3 replicas (m1,m2,m4 <= ms1) -[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ( => ms2) -[MS controller] Iteration 9, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1) +[MS controller] Iteration 10, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 10, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1) [Test] Final state md, 3/3 replicas - ms1, 0/0 replicas ( => ms2) diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden index f9cd58223c96..62a796248b47 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0, random(0).test.log.golden @@ -246,11 +246,42 @@ - ms1, 1/0 replicas (m6 => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) [MS controller] Iteration 27, Reconcile ms1, 1/0 replicas (m6 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 1/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m6 deleted) -[MS controller] Iteration 27, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) +[MS controller] - ms1 scale down to 0/0 replicas (m6 moved to ms2) +[MS controller] Iteration 27, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6🟠🟡,m7 <= ms1) +[MS controller] - Replicas m6 moved from an old MachineSet still pending acknowledge from machine deployment md [MD controller] Iteration 27, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟠🟡,m7 <= ms1) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MD controller] Iteration 28, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MS controller] Iteration 28, Reconcile ms1, 0/0 replicas ( => ms2) +[MD controller] Iteration 29, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MS controller] Iteration 29, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 30, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 30, Reconcile ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m6 deleted) +[MD controller] Iteration 31, Reconcile md [MD controller] - Input to rollout planner md, 7/6 replicas - ms1, 0/0 replicas ( => ms2) @@ -259,6 +290,8 @@ md, 6/6 replicas - ms1, 0/0 replicas ( => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) +[MS controller] Iteration 31, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) +[MS controller] Iteration 31, Reconcile ms1, 0/0 replicas ( => ms2) [Test] Final state md, 6/6 replicas - ms1, 0/0 replicas ( => ms2) diff --git a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0.test.log.golden b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0.test.log.golden index 0e49854314c6..de6a4c61b4ff 100644 --- a/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0.test.log.golden +++ b/internal/controllers/machinedeployment/testdata/rollingupdate/in-place rollout, 6 replicas, maxsurge 10, maxunavailable 0.test.log.golden @@ -205,11 +205,22 @@ - ms1, 1/0 replicas (m6 => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) [MS controller] Iteration 17, Reconcile ms1, 1/0 replicas (m6 => ms2) -[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades -[MS controller] - ms1 scale down to 1/0 replicas ( moved to ms2) -[MS controller] - ms1 scale down to 0/0 replicas (m6 deleted) -[MS controller] Iteration 17, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) +[MS controller] - ms1 scale down to 0/0 replicas (m6 moved to ms2) +[MS controller] Iteration 17, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6🟠🟡,m7 <= ms1) +[MS controller] - Replicas m6 moved from an old MachineSet still pending acknowledge from machine deployment md [MD controller] Iteration 18, Reconcile md +[MD controller] - Input to rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟠🟡,m7 <= ms1) +[MD controller] - Result of rollout planner + md, 7/6 replicas + - ms1, 0/0 replicas ( => ms2) + - ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MS controller] Iteration 18, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 18, Reconcile ms2, 7/6 replicas (m1,m2,m3,m4,m5,m6🟡,m7 <= ms1) +[MS controller] - ms2 scale down to 6/6 replicas (m6 deleted) +[MD controller] Iteration 19, Reconcile md [MD controller] - Input to rollout planner md, 7/6 replicas - ms1, 0/0 replicas ( => ms2) @@ -218,8 +229,8 @@ md, 6/6 replicas - ms1, 0/0 replicas ( => ms2) - ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) -[MS controller] Iteration 18, Reconcile ms1, 0/0 replicas ( => ms2) -[MS controller] Iteration 18, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) +[MS controller] Iteration 19, Reconcile ms1, 0/0 replicas ( => ms2) +[MS controller] Iteration 19, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m7 <= ms1) [Test] Final state md, 6/6 replicas - ms1, 0/0 replicas ( => ms2)