Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"math/rand"
"os"
"slices"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -505,13 +506,13 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop
// TODO(in-place): when implementing in-place changes in the MachineSetController code make sure to:
// - detect if there are replicas still pending AcknowledgeMove first, including also handling cleanup of the pendingAcknowledgeMoveAnnotationName on machines
// - when deleting or moving
// - first move if possible, then delete
// - move machines should be capped to avoid unnecessary in-place upgrades (in case of scale down in the middle of rollouts); remaining part should be deleted
// - do not move machines pending a move Acknowledge, with an in-place upgrade in progress, deleted or marked for deletion, unhealthy
// - move or delete are mutually exclusive
// - do not move machines pending a move Acknowledge, with an in-place upgrade in progress, deleted or marked for deletion (TBD if to be remediated)
// - when deleting, machines updating in place should have higher priority than other machines (first get rid of not at the desired state/try to avoid unnecessary work; also those machines are unavailable).

// Sort machines to ensure stable results of move/delete operations during tests.
// TODO(in-place): this should not be implemented in production code for the MachineSet controller
sortMachineSetMachines(scope.machineSetMachines[ms.Name])
sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name])

// Removing updatingInPlaceAnnotation from machines after pendingAcknowledgeMove is gone in a previous reconcile (so inPlaceUpdating lasts one reconcile more)
// NOTE: This is a testing workaround to simulate inPlaceUpdating being completed; it is implemented in the fake MachineSet controller
Expand Down Expand Up @@ -620,27 +621,21 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop
return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, which does not exists", ms.Name, targetMSName)
}

// Limit the number of machines to be moved to avoid to exceed the final number of replicas for the target MS.
// TODO(in-place): consider using the DesiredReplicasAnnotation which is propagated together with the other decisions of the rollout planner
// another option to consider is to shift this check on the newMS (so we perform move, but we don't start the in-place upgrade).
machinesToMove := min(machinesToDeleteOrMove, ptr.Deref(scope.machineDeployment.Spec.Replicas, 0)-ptr.Deref(targetMS.Spec.Replicas, 0))
Comment on lines -623 to -626
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed during review, this would have been racy when implemented in the prod code for the oldMS reconciliation.

So now we are moving all the machines, and deferring to the newMS to get rid of exceeding machines.
This leads to the ideal results - perform the minimal number of in place upgrades/avoid unnecessary in place updates - under the assumption that machines upgrading in place have the highest deletion priority.

Notes about how to change the prod code have been changed accordingly

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, thx!

if machinesToMove != machinesToDeleteOrMove {
log.Logf("[MS controller] - Move capped to %d replicas to avoid unnecessary in-place upgrades", machinesToMove)
}
machinesToDeleteOrMove -= machinesToMove

validSourceMSs := targetMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]
sourcesSet := sets.Set[string]{}
sourcesSet.Insert(strings.Split(validSourceMSs, ",")...)
if !sourcesSet.Has(ms.Name) {
return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, but %[2]s only accepts machines from %s", ms.Name, targetMS.Name, validSourceMSs)
}

// Always move all the machine that can be moved.
// In case the target machine set will end up with more machines than its target replica number, it will take care of this.
machinesToMove := machinesToDeleteOrMove
machinesMoved := []string{}
machinesSetMachines := []*clusterv1.Machine{}
for i, m := range scope.machineSetMachines[ms.Name] {
// Make sure we are not moving machines still pending AcknowledgeMove
if notAcknowledgeMoveReplicas.Has(m.Name) {
// Make sure we are not moving machines still updating in place (this includes also machines still pending AcknowledgeMove).
if _, ok := m.Annotations[clusterv1.UpdateInProgressAnnotation]; ok {
machinesSetMachines = append(machinesSetMachines, m)
continue
}
Expand Down Expand Up @@ -672,31 +667,39 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop

// Sort machines of the target MS to ensure consistent reporting during tests.
// Note: this is required because a machine can be moved to a target MachineSet that has been already reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile).
sortMachineSetMachines(scope.machineSetMachines[targetMS.Name])
sortMachineSetMachinesByName(scope.machineSetMachines[targetMS.Name])
}
}
}

if machinesToDeleteOrMove > 0 {
machinesToDelete := machinesToDeleteOrMove
machinesDeleted := []string{}
machinesSetMachines := []*clusterv1.Machine{}
for i, m := range scope.machineSetMachines[ms.Name] {
// Prevent deletion of machines not yet acknowledged after a move operation.
// Note: as soon as an in-place upgrade is started, CAPI Should always try to complete it
// before taking further actions on the same machine.
if notAcknowledgeMoveReplicas.Has(m.Name) {
machinesSetMachines = append(machinesSetMachines, m)
continue
}
Comment on lines -685 to -691
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if has been dropped because unnecessary (probably a left over of a previous iteration)

if int32(len(machinesDeleted)) >= machinesToDelete {
machinesSetMachines = append(machinesSetMachines, scope.machineSetMachines[ms.Name][i:]...)
break
} else {
machinesToDelete := machinesToDeleteOrMove
machinesDeleted := []string{}

// Note: in the test code exceeding machines are deleted in predictable order, so it is easier to write test case and validate rollout sequences.
// e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3.
// note: In case the system has to delete some replicas, and those replicas are still updating in place, they gets deleted first.
//
// This prevents the system to perform unnecessary in-place updates. e.g.
// - In place rollout of MD with 3 Replicas, maxSurge 1, MaxUnavailable 0
// - First create m4 to create a buffer for doing in place
// - Move old machines (m1, m2, m3)
// - Resulting new MS at this point has 4 replicas m1, m2, m3 (updating in place) and (m4).
// - The system scales down MS, and the system does this getting rid of m3 - the last replica that started in place.
machinesSetMachinesSortedByDeletePriority := sortMachineSetMachinesByDeletionPriorityAndName(scope.machineSetMachines[ms.Name])
machinesSetMachines := []*clusterv1.Machine{}
for i, m := range machinesSetMachinesSortedByDeletePriority {
if int32(len(machinesDeleted)) >= machinesToDelete {
machinesSetMachines = append(machinesSetMachines, machinesSetMachinesSortedByDeletePriority[i:]...)
break
}
machinesDeleted = append(machinesDeleted, m.Name)
}
machinesDeleted = append(machinesDeleted, m.Name)
scope.machineSetMachines[ms.Name] = machinesSetMachines

// Sort machines of the target MS to ensure consistent reporting during tests.
// Note: this is required specifically by the test code because a machine can be moved to a target MachineSet that has been already
// reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile), and test machinery assumes machine are sorted.
sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name])
log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ","))
}
scope.machineSetMachines[ms.Name] = machinesSetMachines
log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ","))
}

// Update counters
Expand Down Expand Up @@ -1041,12 +1044,41 @@ func (l *fileLogger) WriteLogAndCompareWithGoldenFile() (string, string, error)
return current, golden, nil
}

func sortMachineSetMachines(machines []*clusterv1.Machine) {
func sortMachineSetMachinesByName(machines []*clusterv1.Machine) {
sort.Slice(machines, func(i, j int) bool {
iIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[i].Name, "m"))
jiIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[j].Name, "m"))
return iIndex < jiIndex
iNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[i].Name, "m"))
jNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machines[j].Name, "m"))
return iNameIndex < jNameIndex
})
}

func sortMachineSetMachinesByDeletionPriorityAndName(machines []*clusterv1.Machine) []*clusterv1.Machine {
machinesSetMachinesSortedByDeletePriority := slices.Clone(machines)

// Note: machines updating in place must be deleted first.
// in case of ties:
// - if both machines are updating in place, delete first the machine with the highest machine NameIndex (e.g. between m3 and m4, pick m4, aka the last machine being moved)
// - if both machines are not updating in place, delete first the machine with the lowest machine NameIndex (e.g. between m3 and m4, pick m3)
sort.Slice(machinesSetMachinesSortedByDeletePriority, func(i, j int) bool {
iPriority := 100
if _, ok := machinesSetMachinesSortedByDeletePriority[i].Annotations[clusterv1.UpdateInProgressAnnotation]; ok {
iPriority = 1
}
jPriority := 100
if _, ok := machinesSetMachinesSortedByDeletePriority[j].Annotations[clusterv1.UpdateInProgressAnnotation]; ok {
jPriority = 1
}
if iPriority == jPriority {
iNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machinesSetMachinesSortedByDeletePriority[i].Name, "m"))
jNameIndex, _ := strconv.Atoi(strings.TrimPrefix(machinesSetMachinesSortedByDeletePriority[j].Name, "m"))
if iPriority == 1 {
return iNameIndex > jNameIndex
}
return iNameIndex < jNameIndex
}
return iPriority < jPriority
})
return machinesSetMachinesSortedByDeletePriority
}

// default task order ensure the controllers are run in a consistent and predictable way: md, ms1, ms2 and so on.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,19 @@
- ms2, 3/3 replicas (m1,m2,m3 <= ms1)
[Toleration] tolerate maxSurge breach
[MS controller] Iteration 2, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12 => ms2)
[MS controller] - Move capped to 3 replicas to avoid unnecessary in-place upgrades
[MS controller] - ms1 scale down to 6/2 replicas (m4,m5,m6 moved to ms2)
[MS controller] - ms1 scale down to 2/2 replicas (m7,m8,m9,m10 deleted)
[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 moved to ms2)
[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12 => ms2)
[MD controller] Iteration 3, Reconcile md
[MD controller] - Input to rollout planner
md, 12/6 replicas
- ms1, 2/2 replicas (m11,m12 => ms2)
- ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1)
- ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1)
[MD controller] - Result of rollout planner
md, 5/6 replicas
- ms1, 2/2 replicas (m11,m12 => ms2)
- ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1)
[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1)
- ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1)
[MS controller] Iteration 3, Reconcile ms2, 3/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1)
[MS controller] - ms2 scale down to 6/6 replicas (m10,m9,m8,m7 deleted)
[MD controller] Iteration 4, Reconcile md
[MD controller] - Input to rollout planner
md, 5/6 replicas
Expand Down Expand Up @@ -75,12 +74,24 @@
- ms1, 2/0 replicas (m11,m12 => ms2)
- ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] Iteration 8, Reconcile ms1, 2/0 replicas (m11,m12 => ms2)
[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades
[MS controller] - ms1 scale down to 2/0 replicas ( moved to ms2)
[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted)
[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 moved to ms2)
[MS controller] Iteration 9, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1)
[MS controller] - Replicas m11,m12 moved from an old MachineSet still pending acknowledge from machine deployment md
[MS controller] Iteration 9, Reconcile ms1, 0/0 replicas ( => ms2)
[MD controller] Iteration 9, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1)
[MD controller] - Result of rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1)
[MS controller] Iteration 10, Reconcile ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1)
[MS controller] - ms2 scale down to 6/6 replicas (m12,m11 deleted)
[MS controller] Iteration 10, Reconcile ms1, 0/0 replicas ( => ms2)
[MS controller] Iteration 11, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MD controller] Iteration 11, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,25 @@
- ms2, 3/3 replicas (m1,m2,m3 <= ms1)
[Toleration] tolerate maxSurge breach
[MS controller] Iteration 1, Reconcile ms1, 9/2 replicas (m4,m5,m6,m7,m8,m9,m10,m11,m12 => ms2)
[MS controller] - Move capped to 3 replicas to avoid unnecessary in-place upgrades
[MS controller] - ms1 scale down to 6/2 replicas (m4,m5,m6 moved to ms2)
[MS controller] - ms1 scale down to 2/2 replicas (m7,m8,m9,m10 deleted)
[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1)
[MS controller] - Replicas m4,m5,m6 moved from an old MachineSet still pending acknowledge from machine deployment md
[MS controller] - ms1 scale down to 2/2 replicas (m4,m5,m6,m7,m8,m9,m10 moved to ms2)
[MS controller] Iteration 1, Reconcile ms2, 3/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1)
[MS controller] - Replicas m10,m4,m5,m6,m7,m8,m9 moved from an old MachineSet still pending acknowledge from machine deployment md
[MD controller] Iteration 2, Reconcile md
[MD controller] - Input to rollout planner
md, 12/6 replicas
- ms1, 2/2 replicas (m11,m12 => ms2)
- ms2, 6/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡 <= ms1)
- ms2, 10/3 replicas (m1,m2,m3,m4🟠🟡,m5🟠🟡,m6🟠🟡,m7🟠🟡,m8🟠🟡,m9🟠🟡,m10🟠🟡 <= ms1)
[MD controller] - Result of rollout planner
md, 8/6 replicas
md, 12/6 replicas
- ms1, 2/2 replicas (m11,m12 => ms2)
- ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1)
- ms2, 10/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1)
[Toleration] tolerate maxSurge breach
[MS controller] Iteration 2, Reconcile ms1, 2/2 replicas (m11,m12 => ms2)
[MS controller] Iteration 2, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1)
[MS controller] Iteration 2, Reconcile ms2, 10/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡,m7🟡,m8🟡,m9🟡,m10🟡 <= ms1)
[MS controller] - ms2 scale down to 6/6 replicas (m10,m9,m8,m7 deleted)
[MD controller] Iteration 3, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
md, 12/6 replicas
- ms1, 2/2 replicas (m11,m12 => ms2)
- ms2, 6/6 replicas (m1,m2,m3,m4🟡,m5🟡,m6🟡 <= ms1)
[MD controller] - Result of rollout planner
Expand All @@ -54,11 +54,22 @@
- ms1, 2/0 replicas (m11,m12 => ms2)
- ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] Iteration 4, Reconcile ms1, 2/0 replicas (m11,m12 => ms2)
[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades
[MS controller] - ms1 scale down to 2/0 replicas ( moved to ms2)
[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 deleted)
[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] - ms1 scale down to 0/0 replicas (m11,m12 moved to ms2)
[MS controller] Iteration 4, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1)
[MS controller] - Replicas m11,m12 moved from an old MachineSet still pending acknowledge from machine deployment md
[MD controller] Iteration 5, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟠🟡,m12🟠🟡 <= ms1)
[MD controller] - Result of rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1)
[MS controller] Iteration 5, Reconcile ms1, 0/0 replicas ( => ms2)
[MS controller] Iteration 5, Reconcile ms2, 8/6 replicas (m1,m2,m3,m4,m5,m6,m11🟡,m12🟡 <= ms1)
[MS controller] - ms2 scale down to 6/6 replicas (m12,m11 deleted)
[MD controller] Iteration 6, Reconcile md
[MD controller] - Input to rollout planner
md, 8/6 replicas
- ms1, 0/0 replicas ( => ms2)
Expand All @@ -67,8 +78,8 @@
md, 6/6 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] Iteration 5, Reconcile ms1, 0/0 replicas ( => ms2)
[MS controller] Iteration 5, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[MS controller] Iteration 6, Reconcile ms1, 0/0 replicas ( => ms2)
[MS controller] Iteration 6, Reconcile ms2, 6/6 replicas (m1,m2,m3,m4,m5,m6 <= ms1)
[Test] Final state
md, 6/6 replicas
- ms1, 0/0 replicas ( => ms2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,18 +128,28 @@
[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1)
[MS controller] Iteration 16, Reconcile ms2, 3/3 replicas (m1,m2,m4 <= ms1)
[MS controller] Iteration 16, Reconcile ms1, 1/0 replicas (m3 => ms2)
[MS controller] - Move capped to 0 replicas to avoid unnecessary in-place upgrades
[MS controller] - ms1 scale down to 1/0 replicas ( moved to ms2)
[MS controller] - ms1 scale down to 0/0 replicas (m3 deleted)
[MS controller] - ms1 scale down to 0/0 replicas (m3 moved to ms2)
[MD controller] Iteration 16, Reconcile md
[MD controller] - Input to rollout planner
md, 4/3 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 3/3 replicas (m1,m2,m4 <= ms1)
- ms2, 3/3 replicas (m1,m2,m3🟠🟡,m4 <= ms1)
[MD controller] - Result of rollout planner
md, 3/3 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 3/3 replicas (m1,m2,m4 <= ms1)
- ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1)
[MD controller] Iteration 17, Reconcile md
[MD controller] - Input to rollout planner
md, 3/3 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1)
[MD controller] - Result of rollout planner
md, 3/3 replicas
- ms1, 0/0 replicas ( => ms2)
- ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1)
[MS controller] Iteration 17, Reconcile ms2, 3/3 replicas (m1,m2,m3🟡,m4 <= ms1)
[MS controller] - ms2 scale down to 3/3 replicas (m3 deleted)
[MS controller] Iteration 17, Reconcile ms1, 0/0 replicas ( => ms2)
[Test] Final state
md, 3/3 replicas
- ms1, 0/0 replicas ( => ms2)
Expand Down
Loading