diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index f48e64045c67..ec3d548f70f0 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -806,12 +806,6 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(_ context.C } // syncMachines updates Machines, InfrastructureMachines and KubeadmConfigs to propagate in-place mutable fields from KCP. -// Note: It also cleans up managed fields of all Machines so that Machines that were -// created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA. -// Note: For InfrastructureMachines and KubeadmConfigs it also drops ownership of "metadata.labels" and -// "metadata.annotations" from "manager" so that "capi-kubeadmcontrolplane" can own these fields and can work with SSA. -// Otherwise, fields would be co-owned by our "old" "manager" and "capi-kubeadmcontrolplane" and then we would not be -// able to e.g. drop labels and annotations. func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *internal.ControlPlane) error { patchHelpers := map[string]*patch.Helper{} for machineName := range controlPlane.Machines { diff --git a/controlplane/kubeadm/internal/controllers/inplace_trigger.go b/controlplane/kubeadm/internal/controllers/inplace_trigger.go index 5a0b1c8ec5b0..7e49bceb5863 100644 --- a/controlplane/kubeadm/internal/controllers/inplace_trigger.go +++ b/controlplane/kubeadm/internal/controllers/inplace_trigger.go @@ -21,6 +21,7 @@ import ( "time" "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" @@ -100,7 +101,8 @@ func (r *KubeadmControlPlaneReconciler) triggerInPlaceUpdate(ctx context.Context // of an in-place update here, e.g. for the case where the InfraMachineTemplate was rotated. clusterv1.TemplateClonedFromNameAnnotation: desiredInfraMachine.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation], clusterv1.TemplateClonedFromGroupKindAnnotation: desiredInfraMachine.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation], - clusterv1.UpdateInProgressAnnotation: "", + // Machine controller waits for this annotation to exist on Machine and related objects before starting the in-place update. + clusterv1.UpdateInProgressAnnotation: "", }) if err := ssa.Patch(ctx, r.Client, kcpManagerName, desiredInfraMachine); err != nil { return errors.Wrapf(err, "failed to complete triggering in-place update for Machine %s", klog.KObj(machine)) @@ -109,6 +111,7 @@ func (r *KubeadmControlPlaneReconciler) triggerInPlaceUpdate(ctx context.Context // Write KubeadmConfig without the labels & annotations that are written continuously by updateLabelsAndAnnotations. desiredKubeadmConfig.Labels = nil desiredKubeadmConfig.Annotations = map[string]string{ + // Machine controller waits for this annotation to exist on Machine and related objects before starting the in-place update. clusterv1.UpdateInProgressAnnotation: "", } if err := ssa.Patch(ctx, r.Client, kcpManagerName, desiredKubeadmConfig); err != nil { @@ -134,6 +137,7 @@ func (r *KubeadmControlPlaneReconciler) triggerInPlaceUpdate(ctx context.Context } log.Info("Completed triggering in-place update", "Machine", klog.KObj(machine)) + r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulStartInPlaceUpdate", "Machine starting in-place update") // Wait until the cache observed the Machine with PendingHooksAnnotation to ensure subsequent reconciles // will observe it as well and won't repeatedly call triggerInPlaceUpdate. diff --git a/controlplane/kubeadm/internal/controllers/inplace_trigger_test.go b/controlplane/kubeadm/internal/controllers/inplace_trigger_test.go index ede49afd21d3..b6e0da22522c 100644 --- a/controlplane/kubeadm/internal/controllers/inplace_trigger_test.go +++ b/controlplane/kubeadm/internal/controllers/inplace_trigger_test.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -278,7 +279,8 @@ func Test_triggerInPlaceUpdate(t *testing.T) { } r := KubeadmControlPlaneReconciler{ - Client: env.Client, + Client: env.Client, + recorder: record.NewFakeRecorder(32), } err := r.triggerInPlaceUpdate(ctx, currentMachineForPatch, upToDateResult) diff --git a/controlplane/kubeadm/internal/desiredstate/desired_state.go b/controlplane/kubeadm/internal/desiredstate/desired_state.go index b58c273a084b..072645628f13 100644 --- a/controlplane/kubeadm/internal/desiredstate/desired_state.go +++ b/controlplane/kubeadm/internal/desiredstate/desired_state.go @@ -225,7 +225,7 @@ func ComputeDesiredKubeadmConfig(kcp *controlplanev1.KubeadmControlPlane, cluste } DefaultFeatureGates(spec, parsedVersion) - return &bootstrapv1.KubeadmConfig{ + kubeadmConfig := &bootstrapv1.KubeadmConfig{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: kcp.Namespace, @@ -234,7 +234,11 @@ func ComputeDesiredKubeadmConfig(kcp *controlplanev1.KubeadmControlPlane, cluste OwnerReferences: ownerReferences, }, Spec: *spec, - }, nil + } + if existingKubeadmConfig != nil { + kubeadmConfig.SetUID(existingKubeadmConfig.GetUID()) + } + return kubeadmConfig, nil } // ComputeDesiredInfraMachine computes the desired InfraMachine. @@ -279,6 +283,9 @@ func ComputeDesiredInfraMachine(ctx context.Context, c client.Client, kcp *contr if err != nil { return nil, errors.Wrap(err, "failed to compute desired InfraMachine") } + if existingInfraMachine != nil { + infraMachine.SetUID(existingInfraMachine.GetUID()) + } return infraMachine, nil } diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go b/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go index 874d4bcd6e2f..8717b4ab7705 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_planner_test.go @@ -39,7 +39,9 @@ import ( "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" + "sigs.k8s.io/cluster-api/internal/hooks" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conversion" ) @@ -499,25 +501,15 @@ func machineControllerMutator(log *fileLogger, m *clusterv1.Machine, scope *roll // machineSetControllerMutator fakes a small part of the MachineSet controller, just what is required for the rollout to progress. func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scope *rolloutScope) error { - // Update counters - // Note: this should not be implemented in production code - ms.Status.Replicas = ptr.To(int32(len(scope.machineSetMachines[ms.Name]))) - - // TODO(in-place): when implementing in-place changes in the MachineSetController code make sure to: - // - detect if there are replicas still pending AcknowledgeMove first, including also handling cleanup of the pendingAcknowledgeMoveAnnotationName on machines - // - when deleting or moving - // - move or delete are mutually exclusive - // - do not move machines pending a move Acknowledge, with an in-place upgrade in progress, deleted or marked for deletion (TBD if to be remediated) - // - when deleting, machines updating in place should have higher priority than other machines (first get rid of not at the desired state/try to avoid unnecessary work; also those machines are unavailable). - - // Sort machines to ensure stable results of move/delete operations during tests. - // TODO(in-place): this should not be implemented in production code for the MachineSet controller - sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) - - // Removing updatingInPlaceAnnotation from machines after pendingAcknowledgeMove is gone in a previous reconcile (so inPlaceUpdating lasts one reconcile more) - // NOTE: This is a testing workaround to simulate inPlaceUpdating being completed; it is implemented in the fake MachineSet controller - // because while testing RolloutUpdate sequences with in-place updates we are not currently using a fake Machine controller. - // TODO(in-place): this should not be implemented in production code for the MachineSet controller nor in the production code for the Machine controller + // The prod code for the MachineSet controller performs in order triggerInPlaceUpdate and then syncReplicas and then updateStatus. + // This func mimics the same code structure, with the addition of the following operation that is implemented here for convenience. + + // In order to simulate an in-place update being completed, remove the UpdateInProgressAnnotation from machines after + // pendingAcknowledgeMove is gone in a previous reconcile. + // Note: ideally this should be implemented in the fake Machine controller, but current implementation is + // considered an acceptable trade-off because it provides a signal about in-place update completed, without + // the need of adding the complexity of machine controller reconcile to the sequence tests/to the golden files + // for the RollingUpdate strategy. replicasEndingInPlaceUpdate := sets.Set[string]{} for _, m := range scope.machineSetMachines[ms.Name] { if _, ok := m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; ok { @@ -532,178 +524,258 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop log.Logf("[MS controller] - Replicas %s completed in place update", sortAndJoin(replicasEndingInPlaceUpdate.UnsortedList())) } - // If the MachineSet is accepting replicas from other MS (this is the newMS controlled by a MD), - // detect if there are replicas still pending AcknowledgedMove. - // Replicas not yet acknowledged will have a special treatment when scaling up/down the MS, because they are not included in the replica count; - // instead, for already acknowledged replicas, cleanup the PendingAcknowledgeMoveAnnotation. - acknowledgedMoveReplicas := sets.Set[string]{} - if replicaNames, ok := ms.Annotations[clusterv1.AcknowledgedMoveAnnotation]; ok && replicaNames != "" { - acknowledgedMoveReplicas.Insert(strings.Split(replicaNames, ",")...) - } - notAcknowledgeMoveReplicas := sets.Set[string]{} - if sourceMSs, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok && sourceMSs != "" { - for _, m := range scope.machineSetMachines[ms.Name] { - if _, ok := m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; !ok { - continue - } + // Starting from here, the code must mirror the implementation of the Reconcile func in the MachineSet controller to ensure the reliability of the tests. + // NOTE: For consistency with the prod code, triggerInPlaceUpdate and then syncReplicas should not rely on ms.Status, + // because ms.status is computed only at later stage by defer updateStatus. + // (the code should rely on the list of machines instead). + + defer func() { + machineSetControllerMutatorUpdateStatus(ms, scope) + }() + + machineSetControllerMutatorTriggerInPlaceUpdate(ms, scope) + return machineSetControllerMutatorSyncReplicas(log, ms, scope) +} + +func machineSetControllerMutatorTriggerInPlaceUpdate(ms *clusterv1.MachineSet, scope *rolloutScope) { + // Code below this line is a subset of the code from MachineSet controller's triggerInPlaceUpdate func, e.g. + // it does not complete the move operation (what is implemented in moveMachine is enough for this test), + // nor it sets the pendingHook annotation. - // If machine has been acknowledged by the MachineDeployment, cleanup pending AcknowledgeMove annotation from the machine - if acknowledgedMoveReplicas.Has(m.Name) { + // If the existing machine is pending acknowledge from the MD controller after a move operation, + // wait until if it possible to drop the PendingAcknowledgeMove annotation. + for _, m := range scope.machineSetMachines[ms.Name] { + if _, ok := m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; ok { + // Check if this MachineSet is still accepting machines moved from other MachineSets. + if sourceMSs, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok && sourceMSs != "" { + // Get the list of machines acknowledged by the MD controller. + acknowledgedMoveReplicas := sets.Set[string]{} + if replicaNames, ok := ms.Annotations[clusterv1.AcknowledgedMoveAnnotation]; ok && replicaNames != "" { + acknowledgedMoveReplicas.Insert(strings.Split(replicaNames, ",")...) + } + + // If the current machine is in not yet in the list, it is not possible to trigger in-place yet. + if !acknowledgedMoveReplicas.Has(m.Name) { + continue + } + + // If the current machine is in the list, drop the annotation. + delete(m.Annotations, clusterv1.PendingAcknowledgeMoveAnnotation) + } else { + // If this MachineSet is not accepting anymore machines from other MS (e.g. because of MD spec changes), + // then drop the PendingAcknowledgeMove annotation; this machine will be treated as any other machine and either + // deleted or moved to another MS after completing the in-place upgrade. delete(m.Annotations, clusterv1.PendingAcknowledgeMoveAnnotation) - continue } - - // Otherwise keep track of replicas not yet acknowledged. - notAcknowledgeMoveReplicas.Insert(m.Name) - } - } else { - // Otherwise this MachineSet is not accepting replicas from other MS (this is an oldMS controller by a MD). - // Drop PendingAcknowledgeMoveAnnotation from controlled Machines. - // Note: if there are machines recently moved but not yet accepted, those machines will be managed - // as any other machine and either moved to the new MS (after completing the in-place upgrade) or deleted. - for _, m := range scope.machineSetMachines[ms.Name] { - delete(m.Annotations, clusterv1.PendingAcknowledgeMoveAnnotation) } } - if notAcknowledgeMoveReplicas.Len() > 0 { - log.Logf("[MS controller] - Replicas %s moved from an old MachineSet still pending acknowledge from machine deployment %s", sortAndJoin(notAcknowledgeMoveReplicas.UnsortedList()), klog.KObj(scope.machineDeployment)) - } +} - // if too few machines, create missing machine. - // new machines are created with a predictable name, so it is easier to write test case and validate rollout sequences. - // e.g. if the cluster is initialized with m1, m2, m3, new machines will be m4, m5, m6 - if value, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; !ok || value != "true" { - machinesToAdd := ptr.Deref(ms.Spec.Replicas, 0) - ptr.Deref(ms.Status.Replicas, 0) - if machinesToAdd > 0 { - machinesAdded := []string{} - for range machinesToAdd { - machineName := fmt.Sprintf("m%d", scope.GetNextMachineUID()) - scope.machineSetMachines[ms.Name] = append(scope.machineSetMachines[ms.Name], - &clusterv1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: machineName, - OwnerReferences: []metav1.OwnerReference{ - { - APIVersion: clusterv1.GroupVersion.String(), - Kind: "MachineSet", - Name: ms.Name, - Controller: ptr.To(true), - }, - }, - }, - Spec: *ms.Spec.Template.Spec.DeepCopy(), - }, - ) - machinesAdded = append(machinesAdded, machineName) +func machineSetControllerMutatorSyncReplicas(log *fileLogger, ms *clusterv1.MachineSet, scope *rolloutScope) error { + // Code below this line is a subset of the code from MachineSet controller's "syncReplicas" func. + + diff := len(scope.machineSetMachines[ms.Name]) - int(ptr.Deref(ms.Spec.Replicas, 0)) + switch { + case diff < 0: + // If there are not enough Machines, create missing Machines unless Machine creation is disabled. + machinesToAdd := -diff + if ms.Annotations != nil { + if value, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok && value == "true" { + return nil + } + } + machineSetControllerMutatorCreateMachines(log, ms, scope, machinesToAdd) + + case diff > 0: + // if too many replicas, delete or move exceeding machines. + + // If the MachineSet is accepting replicas from other MachineSets (and thus this is the newMS controlled by a MD), + // detect if there are replicas still pending AcknowledgedMove. + // Note: replicas still pending AcknowledgeMove should not be counted when computing the numbers of machines to delete, because those machines are not included in ms.Spec.Replicas yet. + // Without this check, the following logic would try to align the number of replicas to "an incomplete" ms.Spec.Replicas and as a consequence wrongly delete replicas that should be preserved. + notAcknowledgeMoveReplicas := sets.Set[string]{} + if sourceMSs, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok && sourceMSs != "" { + for _, m := range scope.machineSetMachines[ms.Name] { + if _, ok := m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; !ok { + continue + } + notAcknowledgeMoveReplicas.Insert(m.Name) } + } + if notAcknowledgeMoveReplicas.Len() > 0 { + log.Logf("[MS controller] - Replicas %s moved from an old MachineSet still pending acknowledge from machine deployment %s", sortAndJoin(notAcknowledgeMoveReplicas.UnsortedList()), klog.KObj(scope.machineDeployment)) + } - log.Logf("[MS controller] - %s scale up to %d/%[2]d replicas (%s created)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesAdded, ",")) + machinesToDeleteOrMove := int32(len(scope.machineSetMachines[ms.Name])-notAcknowledgeMoveReplicas.Len()) - ptr.Deref(ms.Spec.Replicas, 0) + if machinesToDeleteOrMove == 0 { + return nil } - } - // if too many replicas, delete or move exceeding machines. - // exceeding machines are deleted or moved in predictable order, so it is easier to write test case and validate rollout sequences. - // e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3. - // Note: replicas still pending AcknowledgeMove should not be counted when computing the numbers of machines to delete, because those machines are not included in ms.Spec.Replicas yet. - // Without this check, the following logic would try to align the number of replicas to "an incomplete" ms.Spec.Replicas thus wrongly deleting replicas that should be preserved. - machinesToDeleteOrMove := max(ptr.Deref(ms.Status.Replicas, 0)-int32(notAcknowledgeMoveReplicas.Len())-ptr.Deref(ms.Spec.Replicas, 0), 0) - if machinesToDeleteOrMove > 0 { + // Move machines to the target MachineSet if the current MachineSet is instructed to do so. if targetMSName, ok := ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation]; ok && targetMSName != "" { - { - var targetMS *clusterv1.MachineSet - for _, ms2 := range scope.machineSets { - if ms2.Name == targetMSName { - targetMS = ms2 - break - } - } - if targetMS == nil { - return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, which does not exists", ms.Name, targetMSName) - } + // Note: The number of machines actually moved could be less than expected e.g. because some machine still updating in-place from a previous move. + return machineSetControllerMutatorMoveMachines(log, ms, scope, targetMSName, machinesToDeleteOrMove) + } - validSourceMSs := targetMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] - sourcesSet := sets.Set[string]{} - sourcesSet.Insert(strings.Split(validSourceMSs, ",")...) - if !sourcesSet.Has(ms.Name) { - return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, but %[2]s only accepts machines from %s", ms.Name, targetMS.Name, validSourceMSs) - } + // Otherwise the current MachineSet is not instructed to move machines to another MachineSet, + // then delete all the exceeding machines. + machineSetControllerMutatorDeleteMachines(log, ms, scope, machinesToDeleteOrMove) + return nil + } + return nil +} - // Always move all the machine that can be moved. - // In case the target machine set will end up with more machines than its target replica number, it will take care of this. - machinesToMove := machinesToDeleteOrMove - machinesMoved := []string{} - machinesSetMachines := []*clusterv1.Machine{} - for i, m := range scope.machineSetMachines[ms.Name] { - // Make sure we are not moving machines still updating in place (this includes also machines still pending AcknowledgeMove). - if _, ok := m.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { - machinesSetMachines = append(machinesSetMachines, m) - continue - } - - if int32(len(machinesMoved)) >= machinesToMove { - machinesSetMachines = append(machinesSetMachines, scope.machineSetMachines[ms.Name][i:]...) - break - } - - m := scope.machineSetMachines[ms.Name][i] - if m.Annotations == nil { - m.Annotations = map[string]string{} - } - m.OwnerReferences = []metav1.OwnerReference{ +func machineSetControllerMutatorCreateMachines(log *fileLogger, ms *clusterv1.MachineSet, scope *rolloutScope, machinesToAdd int) { + // Note: this is a simplified version of the code in the createMachines func from the MachineSet controller, e.g. no preflight checks, + // no/lighter logging, no handling for infraMachine & BootstrapConfig, no event generation, no wait for cache up to date. + // Note: In the code below, new machines are created with a predictable name, so it is easier to write test case and validate rollout sequences. + // e.g. if the cluster is initialized with m1, m2, m3, new machines will be m4, m5, m6 + + machinesAdded := []string{} + for range machinesToAdd { + machineName := fmt.Sprintf("m%d", scope.GetNextMachineUID()) + scope.machineSetMachines[ms.Name] = append(scope.machineSetMachines[ms.Name], + &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineName, + OwnerReferences: []metav1.OwnerReference{ { APIVersion: clusterv1.GroupVersion.String(), Kind: "MachineSet", - Name: targetMS.Name, + Name: ms.Name, Controller: ptr.To(true), }, - } - m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation] = "" - m.Annotations[clusterv1.UpdateInProgressAnnotation] = "" - scope.machineSetMachines[targetMS.Name] = append(scope.machineSetMachines[targetMS.Name], m) - machinesMoved = append(machinesMoved, m.Name) - } - scope.machineSetMachines[ms.Name] = machinesSetMachines - log.Logf("[MS controller] - %s scale down to %d/%d replicas (%s moved to %s)", ms.Name, len(scope.machineSetMachines[ms.Name]), ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesMoved, ","), targetMS.Name) + }, + }, + Spec: *ms.Spec.Template.Spec.DeepCopy(), + }, + ) + machinesAdded = append(machinesAdded, machineName) + } - // Sort machines of the target MS to ensure consistent reporting during tests. - // Note: this is required because a machine can be moved to a target MachineSet that has been already reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile). - sortMachineSetMachinesByName(scope.machineSetMachines[targetMS.Name]) - } - } else { - machinesToDelete := machinesToDeleteOrMove - machinesDeleted := []string{} - - // Note: in the test code exceeding machines are deleted in predictable order, so it is easier to write test case and validate rollout sequences. - // e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3. - // note: In case the system has to delete some replicas, and those replicas are still updating in place, they gets deleted first. - // - // This prevents the system to perform unnecessary in-place updates. e.g. - // - In place rollout of MD with 3 Replicas, maxSurge 1, MaxUnavailable 0 - // - First create m4 to create a buffer for doing in place - // - Move old machines (m1, m2, m3) - // - Resulting new MS at this point has 4 replicas m1, m2, m3 (updating in place) and (m4). - // - The system scales down MS, and the system does this getting rid of m3 - the last replica that started in place. - machinesSetMachinesSortedByDeletePriority := sortMachineSetMachinesByDeletionPriorityAndName(scope.machineSetMachines[ms.Name]) - machinesSetMachines := []*clusterv1.Machine{} - for i, m := range machinesSetMachinesSortedByDeletePriority { - if int32(len(machinesDeleted)) >= machinesToDelete { - machinesSetMachines = append(machinesSetMachines, machinesSetMachinesSortedByDeletePriority[i:]...) - break - } - machinesDeleted = append(machinesDeleted, m.Name) - } - scope.machineSetMachines[ms.Name] = machinesSetMachines + // Sort machines of the target MS to ensure consistent reporting during tests. + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) + + log.Logf("[MS controller] - %s scale up to %d/%[2]d replicas (%s created)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesAdded, ",")) +} + +func machineSetControllerMutatorMoveMachines(log *fileLogger, ms *clusterv1.MachineSet, scope *rolloutScope, targetMSName string, machinesToMove int32) error { + // Note: this is a simplified version of the code in the moveMachines func from the MachineSet controller, e.g. no pluggable move order, + // no update of machine labels, no/lighter logging. Also please note that from the sake of this test, there is no split between start move an + // completeMove (what is implemented below is enough to fake the entire move operation). + // Note: in the test code exceeding machines are moved in predictable order, so it is easier to write test case and validate rollout sequences. + // e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3. + + var targetMS *clusterv1.MachineSet + for _, ms2 := range scope.machineSets { + if ms2.Name == targetMSName { + targetMS = ms2 + break + } + } + if targetMS == nil { + return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, which does not exists", ms.Name, targetMSName) + } + + validSourceMSs := targetMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] + sourcesSet := sets.Set[string]{} + sourcesSet.Insert(strings.Split(validSourceMSs, ",")...) + if !sourcesSet.Has(ms.Name) { + return errors.Errorf("[MS controller] - PANIC! %s is set to send replicas to %s, but %[2]s only accepts machines from %s", ms.Name, targetMS.Name, validSourceMSs) + } + + // Sort machines to ensure stable results of the move operation during tests. + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) + + // Always move all the machine that can be moved. + // In case the target machine set will end up with more machines than its target replica number, it will take care of this. + machinesMoved := []string{} + machinesSetMachines := []*clusterv1.Machine{} + for i, m := range scope.machineSetMachines[ms.Name] { + // Make sure we are not moving machines deleting. + if !m.DeletionTimestamp.IsZero() { + machinesSetMachines = append(machinesSetMachines, m) + continue + } + + // Make sure we are not moving machines still updating in place (this includes also machines still pending AcknowledgeMove). + if _, ok := m.Annotations[clusterv1.UpdateInProgressAnnotation]; ok || hooks.IsPending(runtimehooksv1.UpdateMachine, m) { + machinesSetMachines = append(machinesSetMachines, m) + continue + } - // Sort machines of the target MS to ensure consistent reporting during tests. - // Note: this is required specifically by the test code because a machine can be moved to a target MachineSet that has been already - // reconciled before the source MachineSet (it won't sort machine by itself until the next reconcile), and test machinery assumes machine are sorted. - sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) - log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ",")) + if int32(len(machinesMoved)) >= machinesToMove { + machinesSetMachines = append(machinesSetMachines, scope.machineSetMachines[ms.Name][i:]...) + break } + + m := scope.machineSetMachines[ms.Name][i] + if m.Annotations == nil { + m.Annotations = map[string]string{} + } + m.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "MachineSet", + Name: targetMS.Name, + Controller: ptr.To(true), + }, + } + m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation] = "" + m.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + scope.machineSetMachines[targetMS.Name] = append(scope.machineSetMachines[targetMS.Name], m) + machinesMoved = append(machinesMoved, m.Name) + } + scope.machineSetMachines[ms.Name] = machinesSetMachines + log.Logf("[MS controller] - %s scale down to %d/%d replicas (%s moved to %s)", ms.Name, len(scope.machineSetMachines[ms.Name]), ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesMoved, ","), targetMS.Name) + + // Sort machines of the target MS to ensure consistent reporting during tests. + // Note: It is also required to sort machines for the targetMS because both ms and targetMS lists of machines are changed in this func. + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) + sortMachineSetMachinesByName(scope.machineSetMachines[targetMS.Name]) + return nil +} + +func machineSetControllerMutatorDeleteMachines(log *fileLogger, ms *clusterv1.MachineSet, scope *rolloutScope, machinesToDelete int32) { + // This is a simplified version of the code in the deleteMachines func from the MachineSet controller, e.g. + // the test code does not consider the criteria defined in ms.Spec.Deletion.Order. + + machinesDeleted := []string{} + + // Note: in the test code exceeding machines are deleted in predictable order, so it is easier to write test case and validate rollout sequences. + // e.g. if a ms has m1,m2,m3 created in this order, m1 will be deleted first, then m2 and finally m3. + // Note: In case the system has to delete some machine, and there are machines are still updating in place, those machine must be deleted first. + // + // This prevents the system to perform unnecessary in-place updates. e.g. + // - In place rollout of MD with 3 Replicas, maxSurge 1, MaxUnavailable 0 + // - First create m4 to create a buffer for doing in place + // - Move old machines (m1, m2, m3) + // - Resulting new MS at this point has 4 replicas m1, m2, m3 (updating in place) and (m4). + // - The system scales down MS, and the system does this getting rid of m3 - the last replica that started in place. + machinesSetMachinesSortedByDeletePriority := sortMachineSetMachinesByDeletionPriorityAndName(scope.machineSetMachines[ms.Name]) + + newMachinesSetMachines := []*clusterv1.Machine{} + for i, m := range machinesSetMachinesSortedByDeletePriority { + if int32(len(machinesDeleted)) >= machinesToDelete { + newMachinesSetMachines = append(newMachinesSetMachines, machinesSetMachinesSortedByDeletePriority[i:]...) + break + } + machinesDeleted = append(machinesDeleted, m.Name) } + scope.machineSetMachines[ms.Name] = newMachinesSetMachines + + // Sort machines to ensure consistent reporting during tests. + sortMachineSetMachinesByName(scope.machineSetMachines[ms.Name]) + + log.Logf("[MS controller] - %s scale down to %d/%[2]d replicas (%s deleted)", ms.Name, ptr.Deref(ms.Spec.Replicas, 0), strings.Join(machinesDeleted, ",")) +} + +func machineSetControllerMutatorUpdateStatus(ms *clusterv1.MachineSet, scope *rolloutScope) { + // This is a simplified version of the code in the updateStatus func from the MachineSet controller. + // Note: the corresponding logic in the MS controller looks at the MachineAvailable condition to + // determine availability. Here we are looking at the UpdateInProgress annotation, which is a + // one of the info considered by the MachineAvailable condition. - // Update counters - // TODO(in-place): consider if to revisit this as soon as we have more details on how we are going to surface unavailability of machines updating in place. ms.Status.Replicas = ptr.To(int32(len(scope.machineSetMachines[ms.Name]))) availableReplicas := int32(0) for _, m := range scope.machineSetMachines[ms.Name] { @@ -713,8 +785,6 @@ func machineSetControllerMutator(log *fileLogger, ms *clusterv1.MachineSet, scop availableReplicas++ } ms.Status.AvailableReplicas = ptr.To(availableReplicas) - - return nil } type rolloutScope struct { diff --git a/internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate.go b/internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate.go index 5990f5e54abd..edfcebece0d9 100644 --- a/internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate.go +++ b/internal/controllers/machinedeployment/machinedeployment_rollout_rollingupdate.go @@ -160,6 +160,19 @@ func (p *rolloutPlanner) reconcileReplicasPendingAcknowledgeMove(ctx context.Con } } +// reconcileNewMachineSet reconciles the replica number for the new MS. +// Note: In case of scale down this function does not consider the possible impact on availability. +// This is considered acceptable because historically it never led to any problem, but we might revisit this in the future +// because some limitations of this approach are becoming more evident, e.g. +// +// when users scale down the MD, the operation might temporarily breach min availability (maxUnavailable) +// +// There are code paths specifically added to prevent this issue becoming more relevant when doing in-place updates; +// e.g. the MS controller will give highest delete priority to Machines still updating in-place, +// which are also unavailable Machines. +// +// Notably, there is also no agreement yet on a different way forward because e.g. limiting scale down of the +// new MS could lead e.g. to completing in place update of Machines that will be otherwise deleted. func (p *rolloutPlanner) reconcileNewMachineSet(ctx context.Context) error { log := ctrl.LoggerFrom(ctx) allMSs := append(p.oldMSs, p.newMS) @@ -401,7 +414,6 @@ func (p *rolloutPlanner) reconcileInPlaceUpdateIntent(ctx context.Context) error } // Check if the MachineSet can update in place; if not, move to the next MachineSet. - // TODO(in-place): replace with a call to the hook canUpdateMachineSetInPlaceFunc := func(_ *clusterv1.MachineSet) bool { return false } if p.overrideCanUpdateMachineSetInPlace != nil { canUpdateMachineSetInPlaceFunc = p.overrideCanUpdateMachineSetInPlace diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index 0de38cf2ba79..4c095357bf9c 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -46,12 +46,14 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" "sigs.k8s.io/cluster-api/controllers/clustercache" "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/controllers/machine" "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" + "sigs.k8s.io/cluster-api/internal/hooks" topologynames "sigs.k8s.io/cluster-api/internal/topology/names" "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" @@ -109,6 +111,11 @@ type Reconciler struct { // Note: This field is only used for unit tests that use fake client because the fake client does not properly set resourceVersion // on BootstrapConfig/InfraMachine after ssa.Patch and then ssa.RemoveManagedFieldsForLabelsAndAnnotations would fail. disableRemoveManagedFieldsForLabelsAndAnnotations bool + + // Those fields are only used for test purposes. + overrideCreateMachines func(ctx context.Context, s *scope, machinesToAdd int) (ctrl.Result, error) + overrideMoveMachines func(ctx context.Context, s *scope, targetMSName string, machinesToMove int) (ctrl.Result, error) + overrideDeleteMachines func(ctx context.Context, s *scope, machinesToDelete int) (ctrl.Result, error) } func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -268,6 +275,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (retres ct reconcileNormal := append(alwaysReconcile, wrapErrMachineSetReconcileFunc(r.reconcileUnhealthyMachines, "failed to reconcile unhealthy machines"), wrapErrMachineSetReconcileFunc(r.syncMachines, "failed to sync Machines"), + wrapErrMachineSetReconcileFunc(r.triggerInPlaceUpdate, "failed to trigger in-place update"), wrapErrMachineSetReconcileFunc(r.syncReplicas, "failed to sync replicas"), ) @@ -348,6 +356,157 @@ func patchMachineSet(ctx context.Context, patchHelper *patch.Helper, machineSet return patchHelper.Patch(ctx, machineSet, options...) } +// triggerInPlaceUpdate func completes the move started when scaling down and then trigger the in-place update. +// Note: Usually it is the new MS that receives Machines after a move operation. +func (r *Reconciler) triggerInPlaceUpdate(ctx context.Context, s *scope) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + + errs := []error{} + machinesTriggeredInPlace := []*clusterv1.Machine{} + for _, machine := range s.machines { + // If a machine is not updating in place, or if the in place upgrade has been already triggered, no-op + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; !ok || hooks.IsPending(runtimehooksv1.UpdateMachine, machine) { + continue + } + + // If the existing machine is pending acknowledge from the MD controller after a move operation, + // wait until if it possible to drop the PendingAcknowledgeMove annotation. + orig := machine.DeepCopy() + if _, ok := machine.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; ok { + // Check if this MachineSet is still accepting machines moved from other MachineSets. + if sourceMSs, ok := s.machineSet.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok && sourceMSs != "" { + // Get the list of machines acknowledged by the MD controller. + acknowledgedMoveReplicas := sets.Set[string]{} + if replicaNames, ok := s.machineSet.Annotations[clusterv1.AcknowledgedMoveAnnotation]; ok && replicaNames != "" { + acknowledgedMoveReplicas.Insert(strings.Split(replicaNames, ",")...) + } + + // If the current machine is in not yet in the list, it is not possible to trigger in-place yet. + if !acknowledgedMoveReplicas.Has(machine.Name) { + continue + } + + // If the current machine is in the list, drop the annotation. + delete(machine.Annotations, clusterv1.PendingAcknowledgeMoveAnnotation) + } else { + // If this MachineSet is not accepting anymore machines from other MS (e.g. because of MD spec changes), + // then drop the PendingAcknowledgeMove annotation; this machine will be treated as any other machine and either + // deleted or moved to another MS after completing the in-place upgrade. + delete(machine.Annotations, clusterv1.PendingAcknowledgeMoveAnnotation) + } + } + + // Complete the move operation started by the source MachinesSet by updating machine, infraMachine and boostrapConfig + // to align to the desiredState for the current MachineSet. + if err := r.completeMoveMachine(ctx, s, machine); err != nil { + errs = append(errs, err) + continue + } + + // Note: Once we write PendingHooksAnnotation the Machine controller will start with the in-place update. + hooks.MarkObjectAsPending(machine, runtimehooksv1.UpdateMachine) + + // Note: Intentionally using client.Patch instead of SSA. Otherwise we would + // have to ensure we preserve PendingHooksAnnotation on existing Machines in MachineSet and that would lead to race + // conditions when the Machine controller tries to remove the annotation and MachineSet adds it back. + if err := r.Client.Patch(ctx, machine, client.MergeFrom(orig)); err != nil { + errs = append(errs, errors.Wrapf(err, "failed to start in-place update for Machine %s", klog.KObj(machine))) + continue + } + + machinesTriggeredInPlace = append(machinesTriggeredInPlace, machine) + log.Info("Completed triggering in-place update", "Machine", klog.KObj(machine)) + r.recorder.Event(machine, corev1.EventTypeNormal, "SuccessfulStartInPlaceUpdate", "Machine starting in-place update") + } + + // Wait until the cache observed the Machine with PendingHooksAnnotation to ensure subsequent reconciles + // will observe it as well and won't repeatedly call the logic to trigger in-place update. + if err := r.waitForMachinesInPlaceUpdateStarted(ctx, machinesTriggeredInPlace); err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + return ctrl.Result{}, kerrors.NewAggregate(errs) + } + return ctrl.Result{}, nil +} + +func (r *Reconciler) completeMoveMachine(ctx context.Context, s *scope, currentMachine *clusterv1.Machine) error { + desiredMachine, err := r.computeDesiredMachine(s.machineSet, currentMachine) + if err != nil { + return errors.Wrap(err, "could not compute desired Machine") + } + // Note: spec.version and spec.failureDomain are not mutated in-place by syncMachines and accordingly + // not updated by r.computeDesiredMachine, so we have to update them here. + // Note: for MachineSets we have to explicitly also set spec.failureDomain (this is a difference from what happens in KCP + // where the field is set only on create and never updated) + desiredMachine.Spec.Version = currentMachine.Spec.Version + desiredMachine.Spec.FailureDomain = currentMachine.Spec.FailureDomain + + // Compute desiredInfraMachine. + currentInfraMachine, err := external.GetObjectFromContractVersionedRef(ctx, r.Client, currentMachine.Spec.InfrastructureRef, currentMachine.Namespace) + if err != nil { + return errors.Wrapf(err, "failed to get InfraMachine %s", klog.KRef(currentMachine.Namespace, currentMachine.Spec.InfrastructureRef.Name)) + } + desiredInfraMachine, err := r.computeDesiredInfraMachine(ctx, s.machineSet, currentMachine, currentInfraMachine) + if err != nil { + return errors.Wrap(err, "could not compute desired InfraMachine") + } + + // Make sure we drop the fields that should be continuously updated by syncMachines using the capi-machineset-metadata field owner + // from the desiredInfraMachine (drop all labels and annotations except clonedFrom). + desiredInfraMachine.SetLabels(nil) + desiredInfraMachine.SetAnnotations(map[string]string{ + clusterv1.TemplateClonedFromNameAnnotation: desiredInfraMachine.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation], + clusterv1.TemplateClonedFromGroupKindAnnotation: desiredInfraMachine.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation], + // Machine controller waits for this annotation to exist on Machine and related objects before starting the in-place update. + clusterv1.UpdateInProgressAnnotation: "", + }) + + var desiredBootstrapConfig, currentBootstrapConfig *unstructured.Unstructured + if currentMachine.Spec.Bootstrap.ConfigRef.IsDefined() { + // Compute desiredBootstrapConfig. + currentBootstrapConfig, err = external.GetObjectFromContractVersionedRef(ctx, r.Client, currentMachine.Spec.Bootstrap.ConfigRef, currentMachine.Namespace) + if err != nil { + return errors.Wrapf(err, "failed to get BootstrapConfig %s", klog.KRef(currentMachine.Namespace, currentMachine.Spec.Bootstrap.ConfigRef.Name)) + } + + desiredBootstrapConfig, err = r.computeDesiredBootstrapConfig(ctx, s.machineSet, currentMachine, currentBootstrapConfig) + if err != nil { + return errors.Wrap(err, "could not compute desired BootstrapConfig") + } + + // Make sure we drop the fields that should be continuously updated by syncMachines using the capi-machineset-metadata field owner + // from the desiredBootstrapConfig (drop all labels and annotations except clonedFrom). + desiredBootstrapConfig.SetLabels(nil) + desiredBootstrapConfig.SetAnnotations(map[string]string{ + clusterv1.TemplateClonedFromNameAnnotation: desiredBootstrapConfig.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation], + clusterv1.TemplateClonedFromGroupKindAnnotation: desiredBootstrapConfig.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation], + // Machine controller waits for this annotation to exist on Machine and related objects before starting the in-place update. + clusterv1.UpdateInProgressAnnotation: "", + }) + } + + // Write InfraMachine. + // Note: Let's update InfraMachine first because that is the call that is most likely to fail. + if err := ssa.Patch(ctx, r.Client, machineSetManagerName, desiredInfraMachine); err != nil { + return errors.Wrapf(err, "failed to update InfraMachine %s", klog.KObj(desiredInfraMachine)) + } + + // Write BootstrapConfig. + if desiredMachine.Spec.Bootstrap.ConfigRef.IsDefined() { + if err := ssa.Patch(ctx, r.Client, machineSetManagerName, desiredBootstrapConfig); err != nil { + return errors.Wrapf(err, "failed to update BootstrapConfig %s", klog.KObj(desiredBootstrapConfig)) + } + } + + // Write Machine. + if err := ssa.Patch(ctx, r.Client, machineSetManagerName, desiredMachine); err != nil { + return errors.Wrap(err, "failed to update Machine") + } + + return nil +} + func (r *Reconciler) reconcileMachineSetOwnerAndLabels(_ context.Context, s *scope) (ctrl.Result, error) { machineSet := s.machineSet cluster := s.cluster @@ -492,12 +651,6 @@ func (r *Reconciler) getAndAdoptMachinesForMachineSet(ctx context.Context, s *sc // syncMachines updates Machines, InfrastructureMachine and BootstrapConfig to propagate in-place mutable fields // from the MachineSet. -// Note: It also cleans up managed fields of all Machines so that Machines that were -// created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA. -// Note: For InfrastructureMachines and BootstrapConfigs it also drops ownership of "metadata.labels" and -// "metadata.annotations" from "manager" so that "capi-machineset" can own these fields and can work with SSA. -// Otherwise fields would be co-owned by our "old" "manager" and "capi-machineset" and then we would not be -// able to e.g. drop labels and annotations. func (r *Reconciler) syncMachines(ctx context.Context, s *scope) (ctrl.Result, error) { machineSet := s.machineSet machines := s.machines @@ -665,7 +818,7 @@ func newMachineUpToDateCondition(s *scope) *metav1.Condition { func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, error) { ms := s.machineSet machines := s.machines - cluster := s.cluster + if !s.getAndAdoptMachinesForMachineSetSucceeded { return ctrl.Result{}, nil } @@ -674,145 +827,322 @@ func (r *Reconciler) syncReplicas(ctx context.Context, s *scope) (ctrl.Result, e if ms.Spec.Replicas == nil { return ctrl.Result{}, errors.Errorf("the Replicas field in Spec for MachineSet %v is nil, this should not be allowed", ms.Name) } - diff := len(machines) - int(*(ms.Spec.Replicas)) + diff := len(machines) - int(ptr.Deref(ms.Spec.Replicas, 0)) switch { case diff < 0: - diff *= -1 - log.Info(fmt.Sprintf("MachineSet is scaling up to %d replicas by creating %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines)) + // If there are not enough Machines, create missing Machines unless Machine creation is disabled. + machinesToAdd := -diff + log.Info(fmt.Sprintf("MachineSet is scaling up to %d replicas by creating %d machines", *(ms.Spec.Replicas), machinesToAdd), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines)) if ms.Annotations != nil { if value, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok && value == "true" { log.Info("Automatic creation of new machines disabled for machine set") return ctrl.Result{}, nil } } + return r.createMachines(ctx, s, machinesToAdd) - preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up") - if err != nil || len(preflightCheckErrMessages) > 0 { - if err != nil { - // If err is not nil use that as the preflightCheckErrMessage - preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error()) + case diff > 0: + // if too many replicas, delete or move exceeding machines. + + // If the MachineSet is accepting replicas from other MachineSets (and thus this is the newMS controlled by a MD), + // detect if there are replicas still pending AcknowledgedMove. + // Note: replicas still pending AcknowledgeMove should not be counted when computing the numbers of machines to delete, because those machines are not included in ms.Spec.Replicas yet. + // Without this check, the following logic would try to align the number of replicas to "an incomplete" ms.Spec.Replicas and as a consequence wrongly delete replicas that should be preserved. + notAcknowledgeMoveReplicas := sets.Set[string]{} + if sourceMSs, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok && sourceMSs != "" { + for _, m := range machines { + if _, ok := m.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation]; !ok { + continue + } + notAcknowledgeMoveReplicas.Insert(m.Name) } + } + if notAcknowledgeMoveReplicas.Len() > 0 { + log.Info(fmt.Sprintf("Replicas %s moved from an old MachineSet still pending acknowledge from MachineDeployment", notAcknowledgeMoveReplicas.UnsortedList())) + } - s.scaleUpPreflightCheckErrMessages = preflightCheckErrMessages - v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.PreflightCheckFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", strings.Join(preflightCheckErrMessages, "; ")) - if err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil + machinesToDeleteOrMove := len(machines) - notAcknowledgeMoveReplicas.Len() - int(ptr.Deref(ms.Spec.Replicas, 0)) + if machinesToDeleteOrMove == 0 { + return ctrl.Result{}, nil + } + + // Move machines to the target MachineSet if the current MachineSet is instructed to do so. + if targetMSName, ok := ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation]; ok && targetMSName != "" { + // Note: The number of machines actually moved could be less than expected e.g. because some machine still updating in-place from a previous move. + return r.startMoveMachines(ctx, s, targetMSName, machinesToDeleteOrMove) + } + + // Otherwise the current MachineSet is not instructed to move machines to another MachineSet, + // then delete all the exceeding machines. + return r.deleteMachines(ctx, s, machinesToDeleteOrMove) + } + + return ctrl.Result{}, nil +} + +func (r *Reconciler) createMachines(ctx context.Context, s *scope, machinesToAdd int) (ctrl.Result, error) { + if r.overrideCreateMachines != nil { + return r.overrideCreateMachines(ctx, s, machinesToAdd) + } + + log := ctrl.LoggerFrom(ctx) + ms := s.machineSet + cluster := s.cluster + + preflightCheckErrMessages, err := r.runPreflightChecks(ctx, cluster, ms, "Scale up") + if err != nil || len(preflightCheckErrMessages) > 0 { + if err != nil { + // If err is not nil use that as the preflightCheckErrMessage + preflightCheckErrMessages = append(preflightCheckErrMessages, err.Error()) + } + + s.scaleUpPreflightCheckErrMessages = preflightCheckErrMessages + v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.PreflightCheckFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", strings.Join(preflightCheckErrMessages, "; ")) + if err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil + } + + machinesAdded := []*clusterv1.Machine{} + for i := range machinesToAdd { + // Create a new logger so the global logger is not modified. + log := log + machine, computeMachineErr := r.computeDesiredMachine(ms, nil) + if computeMachineErr != nil { + v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.MachineCreationFailedV1Beta1Reason, + clusterv1.ConditionSeverityError, "%s", computeMachineErr.Error()) + return ctrl.Result{}, errors.Wrap(computeMachineErr, "failed to create Machine: failed to compute desired Machine") } var ( - machineList []*clusterv1.Machine - errs []error + infraRef, bootstrapRef clusterv1.ContractVersionedObjectReference + infraMachine, bootstrapConfig *unstructured.Unstructured ) - for i := range diff { - // Create a new logger so the global logger is not modified. - log := log - machine, computeMachineErr := r.computeDesiredMachine(ms, nil) - if computeMachineErr != nil { - return ctrl.Result{}, errors.Wrap(computeMachineErr, "failed to create Machine: failed to compute desired Machine") + // Create the BootstrapConfig if necessary. + if ms.Spec.Template.Spec.Bootstrap.ConfigRef.IsDefined() { + bootstrapConfig, bootstrapRef, err = r.createBootstrapConfig(ctx, ms, machine) + if err != nil { + v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.BootstrapTemplateCloningFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", err.Error()) + return ctrl.Result{}, errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a Machine", + ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind, + klog.KRef(ms.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name)) } - // Clone and set the infrastructure and bootstrap references. - var ( - infraRef, bootstrapRef clusterv1.ContractVersionedObjectReference - infraMachine, bootstrapConfig *unstructured.Unstructured - ) - - // Create the BootstrapConfig if necessary. - if ms.Spec.Template.Spec.Bootstrap.ConfigRef.IsDefined() { - bootstrapConfig, bootstrapRef, err = r.createBootstrapConfig(ctx, ms, machine) - if err != nil { - v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.BootstrapTemplateCloningFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", err.Error()) - return ctrl.Result{}, errors.Wrapf(err, "failed to clone bootstrap configuration from %s %s while creating a Machine", - ms.Spec.Template.Spec.Bootstrap.ConfigRef.Kind, - klog.KRef(ms.Namespace, ms.Spec.Template.Spec.Bootstrap.ConfigRef.Name)) + machine.Spec.Bootstrap.ConfigRef = bootstrapRef + log = log.WithValues(bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name)) + } + + // Create the InfraMachine. + infraMachine, infraRef, err = r.createInfraMachine(ctx, ms, machine) + if err != nil { + var deleteErr error + if bootstrapRef.IsDefined() { + // Cleanup the bootstrap resource if we can't create the InfraMachine; or we might risk to leak it. + if err := r.Client.Delete(ctx, bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { + deleteErr = errors.Wrapf(err, "failed to cleanup %s %s after %s creation failed", bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name), ms.Spec.Template.Spec.InfrastructureRef.Kind) } - machine.Spec.Bootstrap.ConfigRef = bootstrapRef - log = log.WithValues(bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name)) } - // Create the InfraMachine. - infraMachine, infraRef, err = r.createInfraMachine(ctx, ms, machine) - if err != nil { - var deleteErr error - if bootstrapRef.IsDefined() { - // Cleanup the bootstrap resource if we can't create the InfraMachine; or we might risk to leak it. - if err := r.Client.Delete(ctx, bootstrapConfig); err != nil && !apierrors.IsNotFound(err) { - deleteErr = errors.Wrapf(err, "failed to cleanup %s %s after %s creation failed", bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name), ms.Spec.Template.Spec.InfrastructureRef.Kind) - } + v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.InfrastructureTemplateCloningFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", err.Error()) + return ctrl.Result{}, kerrors.NewAggregate([]error{errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a Machine", + ms.Spec.Template.Spec.InfrastructureRef.Kind, + klog.KRef(ms.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name)), deleteErr}) + } + log = log.WithValues(infraRef.Kind, klog.KRef(ms.Namespace, infraRef.Name)) + machine.Spec.InfrastructureRef = infraRef + + // Create the Machine. + if err := ssa.Patch(ctx, r.Client, machineSetManagerName, machine); err != nil { + // Try to cleanup the external objects if the Machine creation failed. + errs := []error{err} + if err := r.Client.Delete(ctx, infraMachine); !apierrors.IsNotFound(err) { + errs = append(errs, errors.Wrapf(err, "failed to cleanup %s %s after Machine creation failed", infraRef.Kind, klog.KRef(ms.Namespace, infraRef.Name))) + } + if bootstrapRef.IsDefined() { + if err := r.Client.Delete(ctx, bootstrapConfig); !apierrors.IsNotFound(err) { + errs = append(errs, errors.Wrapf(err, "failed to cleanup %s %s after Machine creation failed", bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name))) } - v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.InfrastructureTemplateCloningFailedV1Beta1Reason, clusterv1.ConditionSeverityError, "%s", err.Error()) - return ctrl.Result{}, kerrors.NewAggregate([]error{errors.Wrapf(err, "failed to clone infrastructure machine from %s %s while creating a Machine", - ms.Spec.Template.Spec.InfrastructureRef.Kind, - klog.KRef(ms.Namespace, ms.Spec.Template.Spec.InfrastructureRef.Name)), deleteErr}) } - log = log.WithValues(infraRef.Kind, klog.KRef(ms.Namespace, infraRef.Name)) - machine.Spec.InfrastructureRef = infraRef - // Create the Machine. - if err := ssa.Patch(ctx, r.Client, machineSetManagerName, machine); err != nil { - log.Error(err, "Error while creating a machine") - r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err) - errs = append(errs, err) - v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.MachineCreationFailedV1Beta1Reason, - clusterv1.ConditionSeverityError, "%s", err.Error()) + v1beta1conditions.MarkFalse(ms, clusterv1.MachinesCreatedV1Beta1Condition, clusterv1.MachineCreationFailedV1Beta1Reason, + clusterv1.ConditionSeverityError, "%s", err.Error()) + return ctrl.Result{}, kerrors.NewAggregate(errs) + } - // Try to cleanup the external objects if the Machine creation failed. - if err := r.Client.Delete(ctx, infraMachine); !apierrors.IsNotFound(err) { - log.Error(err, "Failed to cleanup infrastructure machine object after Machine creation error", infraRef.Kind, klog.KRef(ms.Namespace, infraRef.Name)) - } - if bootstrapRef.IsDefined() { - if err := r.Client.Delete(ctx, bootstrapConfig); !apierrors.IsNotFound(err) { - log.Error(err, "Failed to cleanup bootstrap configuration object after Machine creation error", bootstrapRef.Kind, klog.KRef(ms.Namespace, bootstrapRef.Name)) - } - } + machinesAdded = append(machinesAdded, machine) + log.Info(fmt.Sprintf("Machine created (scale up, creating %d of %d)", i+1, machinesToAdd), "Machine", klog.KObj(machine)) + r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulCreate", "Created Machine %q", machine.Name) + } + + // Wait for cache update to ensure following reconcile gets latest change. + return ctrl.Result{}, r.waitForMachinesCreation(ctx, machinesAdded) +} + +func (r *Reconciler) deleteMachines(ctx context.Context, s *scope, machinesToDelete int) (ctrl.Result, error) { + if r.overrideDeleteMachines != nil { + return r.overrideDeleteMachines(ctx, s, machinesToDelete) + } + + log := ctrl.LoggerFrom(ctx) + ms := s.machineSet + machines := s.machines + + log.Info(fmt.Sprintf("MachineSet is scaling down to %d replicas by deleting %d Machines", *(ms.Spec.Replicas), machinesToDelete), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines), "order", cmp.Or(ms.Spec.Deletion.Order, clusterv1.RandomMachineSetDeletionOrder)) + + // Pick the list of machines to be deleted according to the criteria defined in ms.Spec.Deletion.Order. + // Note: In case the system has to delete some machine, and there are machines are still updating in place, those machine must be deleted first. + // + // This prevents the system to perform unnecessary in-place updates. e.g. + // - In place rollout of MD with 3 Replicas, maxSurge 1, MaxUnavailable 0 + // - First create m4 to create a buffer for doing in place + // - Move old machines (m1, m2, m3) + // - Resulting new MS at this point has 4 replicas m1, m2, m3 (updating in place) and (m4). + // - The system scales down MS, and the system does this getting rid of m3 - the last replica that started in place. + deletePriorityFunc, err := getDeletePriorityFunc(ms) + if err != nil { + return ctrl.Result{}, err + } + machinesToDeleteByPriority := getMachinesToDeletePrioritized(machines, machinesToDelete, deletePriorityFunc) + + var errs []error + machinesDeleted := []*clusterv1.Machine{} + for i, machine := range machinesToDeleteByPriority { + log := log.WithValues("Machine", klog.KObj(machine)) + if machine.GetDeletionTimestamp().IsZero() { + if err := r.Client.Delete(ctx, machine); err != nil { + errs = append(errs, err) continue } - log.Info(fmt.Sprintf("Machine created (scale up, creating %d of %d)", i+1, diff), "Machine", klog.KObj(machine)) - r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulCreate", "Created machine %q", machine.Name) - machineList = append(machineList, machine) + machinesDeleted = append(machinesDeleted, machine) + log.Info(fmt.Sprintf("Deleting Machine (scale down, deleting %d of %d)", i+1, machinesToDelete)) + r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted Machine %q", machine.Name) + } else { + log.Info(fmt.Sprintf("Waiting for Machine to be deleted (scale down, deleting %d of %d)", i+1, machinesToDelete)) } + } - if len(errs) > 0 { - return ctrl.Result{}, kerrors.NewAggregate(errs) + // Wait for cache update to ensure following reconcile gets latest change. + if err := r.waitForMachinesDeletion(ctx, machinesDeleted); err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + return ctrl.Result{}, kerrors.NewAggregate(errs) + } + return ctrl.Result{}, nil +} + +func (r *Reconciler) startMoveMachines(ctx context.Context, s *scope, targetMSName string, machinesToMove int) (ctrl.Result, error) { + if r.overrideMoveMachines != nil { + return r.overrideMoveMachines(ctx, s, targetMSName, machinesToMove) + } + + log := ctrl.LoggerFrom(ctx) + ms := s.machineSet + machines := s.machines + + // Check that everything is set for the move operation by validating that the target MS is expecting to + // receive replicas from the current one. + targetMS := &clusterv1.MachineSet{} + if err := r.Client.Get(ctx, client.ObjectKey{Name: targetMSName, Namespace: ms.Namespace}, targetMS); err != nil { + return ctrl.Result{}, errors.Wrapf(err, "failed to get MachineSet %s, which is the target for the move operation", targetMSName) + } + + validSourceMSs := targetMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] + sourcesSet := sets.Set[string]{} + sourcesSet.Insert(strings.Split(validSourceMSs, ",")...) + if !sourcesSet.Has(ms.Name) { + return ctrl.Result{}, errors.Errorf("MachineSet %s is set to move replicas to %s, but %[2]s only accepts Machines from %s", ms.Name, targetMS.Name, validSourceMSs) + } + + log.Info(fmt.Sprintf("MachineSet is scaling down to %d replicas by moving %d Machines to %s", *(ms.Spec.Replicas), machinesToMove, targetMSName), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines), "order", cmp.Or(ms.Spec.Deletion.Order, clusterv1.RandomMachineSetDeletionOrder)) + + // Sort to Move machine in deterministic and predictable order. + // Note: For convenience we sort machine using the ordering criteria defined in ms.Spec.Deletion.Order. + deletePriorityFunc, err := getDeletePriorityFunc(ms) + if err != nil { + return ctrl.Result{}, err + } + machinesToMoveByPriority := getMachinesToMovePrioritized(machines, deletePriorityFunc) + + errs := []error{} + machinesMoved := []*clusterv1.Machine{} + for _, machine := range machinesToMoveByPriority { + if machinesToMove <= 0 { + break } - return ctrl.Result{}, r.waitForMachineCreation(ctx, machineList) - case diff > 0: - log.Info(fmt.Sprintf("MachineSet is scaling down to %d replicas by deleting %d machines", *(ms.Spec.Replicas), diff), "replicas", *(ms.Spec.Replicas), "machineCount", len(machines), "order", cmp.Or(ms.Spec.Deletion.Order, clusterv1.RandomMachineSetDeletionOrder)) - deletePriorityFunc, err := getDeletePriorityFunc(ms) - if err != nil { - return ctrl.Result{}, err + log := log.WithValues("Machine", klog.KObj(machine)) + + // Make sure we are not moving machines still updating in place from a previous move (this includes also machines still pending AcknowledgeMove). + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; ok || hooks.IsPending(runtimehooksv1.UpdateMachine, machine) { + continue } - var errs []error - machinesToDelete := getMachinesToDeletePrioritized(machines, diff, deletePriorityFunc) - for i, machine := range machinesToDelete { - log := log.WithValues("Machine", klog.KObj(machine)) - if machine.GetDeletionTimestamp().IsZero() { - if err := r.Client.Delete(ctx, machine); err != nil { - log.Error(err, "Unable to delete Machine") - r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedDelete", "Failed to delete machine %q: %v", machine.Name, err) - errs = append(errs, err) - continue - } - // Note: We intentionally log after Delete because we want this log line to show up only after DeletionTimestamp has been set. - // Also, setting DeletionTimestamp doesn't mean the Machine is actually deleted (deletion takes some time). - log.Info(fmt.Sprintf("Deleting Machine (scale down, deleting %d of %d)", i+1, diff)) - r.recorder.Eventf(ms, corev1.EventTypeNormal, "SuccessfulDelete", "Deleted machine %q", machine.Name) - } else { - log.Info(fmt.Sprintf("Waiting for Machine to be deleted (scale down, deleting %d of %d)", i+1, diff)) - } + // Note. Machines with the DeleteMachineAnnotation are going to be moved and the new MS + // will take care of fulfilling this intent as soon as it scales down. + // Note. Also Machines marked as unhealthy by MHC are going to be moved, because otherwise + // remediation will not complete as the Machine is owned by an old MS. + + machinesToMove-- + + // If there are machines already deleting, wait for them to go away before further scaling down with move. + if !machine.DeletionTimestamp.IsZero() { + continue } - if len(errs) > 0 { - return ctrl.Result{}, kerrors.NewAggregate(errs) + // Perform the first part of a move operation, the part under the responsibility of the oldMS. + orig := machine.DeepCopy() + + // Change the ownerReference from current MS to target MS + // Note: After move, when the first reconcile of the target MS will happen, there will be co-ownership on + // this ownerReference between "manager" and "capi-machineset", but this is not an issue because the MS controller will never unset this field. + machine.OwnerReferences = util.ReplaceOwnerRef(machine.OwnerReferences, ms, metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "MachineSet", + Name: targetMS.Name, + UID: targetMS.UID, + Controller: ptr.To(true), + }) + + // Change machine's labels from current MS to target MS + // Note: The implementation assumes that current and target MS are originated from the same MachineDeployment, + // and thus this change can be performed in a surgical way by patching only the MachineDeploymentUniqueLabel. + // Notably, minimizing label changes, simplifies impacts and considerations about managedFields and co-ownership. + // Note: After move, when the first reconcile of the target MS will happen, there will be co-ownership on + // this label between "manager" and "capi-machineset", but this is not an issue because the MS controller will never unset this label. + targetUniqueLabel, ok := targetMS.Labels[clusterv1.MachineDeploymentUniqueLabel] + if !ok { + return ctrl.Result{}, errors.Errorf("MachineSet %s does not have the %s label", targetMS.Name, clusterv1.MachineDeploymentUniqueLabel) + } + machine.Labels[clusterv1.MachineDeploymentUniqueLabel] = targetUniqueLabel + + // Apply the UpdateInProgress and PendingAcknowledgeMove annotation + if machine.Annotations == nil { + machine.Annotations = map[string]string{} + } + machine.Annotations[clusterv1.UpdateInProgressAnnotation] = "" + machine.Annotations[clusterv1.PendingAcknowledgeMoveAnnotation] = "" + + // Note: Intentionally using client.Patch instead of SSA. Otherwise we would have to ensure we preserve + // UpdateInProgressAnnotation on existing Machines and that would lead to race conditions when + // the Machine controller tries to remove the annotation and then the MachineSet controller adds it back. + if err := r.Client.Patch(ctx, machine, client.MergeFrom(orig)); err != nil { + log.Error(err, "Failed to start moving Machine") + errs = append(errs, errors.Wrapf(err, "failed to start moving Machine %s", klog.KObj(machine))) + continue } - return ctrl.Result{}, r.waitForMachineDeletion(ctx, machinesToDelete) + machinesMoved = append(machinesMoved, machine) } + // Wait for cache update to ensure following reconcile gets latest change. + if err := r.waitForMachinesStartedMove(ctx, machinesMoved); err != nil { + errs = append(errs, err) + } + if len(errs) > 0 { + return ctrl.Result{}, kerrors.NewAggregate(errs) + } return ctrl.Result{}, nil } @@ -884,8 +1214,11 @@ func (r *Reconciler) computeDesiredMachine(machineSet *clusterv1.MachineSet, exi } desiredMachine.Finalizers = finalizers + // Make sure sync machines do not change the fields that are not in-place mutable desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef + desiredMachine.Spec.Version = existingMachine.Spec.Version + desiredMachine.Spec.FailureDomain = existingMachine.Spec.FailureDomain } // Set the in-place mutable fields. // When we create a new Machine we will just create the Machine with those fields. @@ -985,12 +1318,9 @@ func (r *Reconciler) adoptOrphan(ctx context.Context, machineSet *clusterv1.Mach return r.Client.Patch(ctx, machine, patch) } -func (r *Reconciler) waitForMachineCreation(ctx context.Context, machineList []*clusterv1.Machine) error { - log := ctrl.LoggerFrom(ctx) - - for i := range machineList { - machine := machineList[i] - pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { +func (r *Reconciler) waitForMachinesCreation(ctx context.Context, machines []*clusterv1.Machine) error { + pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { + for _, machine := range machines { key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name} if err := r.Client.Get(ctx, key, &clusterv1.Machine{}); err != nil { if apierrors.IsNotFound(err) { @@ -998,38 +1328,78 @@ func (r *Reconciler) waitForMachineCreation(ctx context.Context, machineList []* } return false, err } + } + return true, nil + }) - return true, nil - }) + if pollErr != nil { + return errors.Wrap(pollErr, "failed waiting for Machines to be created") + } + return nil +} - if pollErr != nil { - log.Error(pollErr, "Failed waiting for machine object to be created") - return errors.Wrap(pollErr, "failed waiting for machine object to be created") +func (r *Reconciler) waitForMachinesDeletion(ctx context.Context, machines []*clusterv1.Machine) error { + pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { + for _, machine := range machines { + m := &clusterv1.Machine{} + key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name} + if err := r.Client.Get(ctx, key, m); err != nil { + if apierrors.IsNotFound(err) { + continue + } + return false, err + } + if m.DeletionTimestamp.IsZero() { + return false, nil + } } - } + return true, nil + }) + if pollErr != nil { + return errors.Wrap(pollErr, "failed waiting for Machines to be deleted") + } return nil } -func (r *Reconciler) waitForMachineDeletion(ctx context.Context, machineList []*clusterv1.Machine) error { - log := ctrl.LoggerFrom(ctx) - - for i := range machineList { - machine := machineList[i] - pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { +func (r *Reconciler) waitForMachinesStartedMove(ctx context.Context, machines []*clusterv1.Machine) error { + pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { + for _, machine := range machines { m := &clusterv1.Machine{} key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name} - err := r.Client.Get(ctx, key, m) - if apierrors.IsNotFound(err) || !m.DeletionTimestamp.IsZero() { - return true, nil + if err := r.Client.Get(ctx, key, m); err != nil { + return false, err } - return false, err - }) + if _, annotationSet := m.Annotations[clusterv1.UpdateInProgressAnnotation]; !annotationSet { + return false, nil + } + } + return true, nil + }) - if pollErr != nil { - log.Error(pollErr, "Failed waiting for machine object to be deleted") - return errors.Wrap(pollErr, "failed waiting for machine object to be deleted") + if pollErr != nil { + return errors.Wrap(pollErr, "failed waiting for Machines to start move") + } + return nil +} + +func (r *Reconciler) waitForMachinesInPlaceUpdateStarted(ctx context.Context, machines []*clusterv1.Machine) error { + pollErr := wait.PollUntilContextTimeout(ctx, stateConfirmationInterval, stateConfirmationTimeout, true, func(ctx context.Context) (bool, error) { + for _, machine := range machines { + m := &clusterv1.Machine{} + key := client.ObjectKey{Namespace: machine.Namespace, Name: machine.Name} + if err := r.Client.Get(ctx, key, m); err != nil { + return false, err + } + if !hooks.IsPending(runtimehooksv1.UpdateMachine, m) { + return false, nil + } } + return true, nil + }) + + if pollErr != nil { + return errors.Wrap(pollErr, "failed waiting for Machines to complete move") } return nil } @@ -1553,7 +1923,7 @@ func (r *Reconciler) reconcileExternalTemplateReference(ctx context.Context, clu } func (r *Reconciler) createBootstrapConfig(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine) (*unstructured.Unstructured, clusterv1.ContractVersionedObjectReference, error) { - bootstrapConfig, err := r.computeDesiredBootstrapConfig(ctx, ms, machine) + bootstrapConfig, err := r.computeDesiredBootstrapConfig(ctx, ms, machine, nil) if err != nil { return nil, clusterv1.ContractVersionedObjectReference{}, errors.Wrapf(err, "failed to create BootstrapConfig") } @@ -1582,12 +1952,15 @@ func (r *Reconciler) createBootstrapConfig(ctx context.Context, ms *clusterv1.Ma }, nil } -func (r *Reconciler) computeDesiredBootstrapConfig(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine) (*unstructured.Unstructured, error) { - ownerReference := &metav1.OwnerReference{ - APIVersion: clusterv1.GroupVersion.String(), - Kind: "MachineSet", - Name: ms.Name, - UID: ms.UID, +func (r *Reconciler) computeDesiredBootstrapConfig(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine, existingBootstrapConfig *unstructured.Unstructured) (*unstructured.Unstructured, error) { + var ownerReference *metav1.OwnerReference + if existingBootstrapConfig == nil || !util.HasOwner(existingBootstrapConfig.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Machine"}) { + ownerReference = &metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "MachineSet", + Name: ms.Name, + UID: ms.UID, + } } apiVersion, err := contract.GetAPIVersion(ctx, r.Client, ms.Spec.Template.Spec.Bootstrap.ConfigRef.GroupKind()) @@ -1619,11 +1992,15 @@ func (r *Reconciler) computeDesiredBootstrapConfig(ctx context.Context, ms *clus if err != nil { return nil, errors.Wrap(err, "failed to compute desired BootstrapConfig") } + + if existingBootstrapConfig != nil { + bootstrapConfig.SetUID(existingBootstrapConfig.GetUID()) + } return bootstrapConfig, nil } func (r *Reconciler) createInfraMachine(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine) (*unstructured.Unstructured, clusterv1.ContractVersionedObjectReference, error) { - infraMachine, err := r.computeDesiredInfraMachine(ctx, ms, machine) + infraMachine, err := r.computeDesiredInfraMachine(ctx, ms, machine, nil) if err != nil { return nil, clusterv1.ContractVersionedObjectReference{}, errors.Wrapf(err, "failed to create InfraMachine") } @@ -1652,12 +2029,15 @@ func (r *Reconciler) createInfraMachine(ctx context.Context, ms *clusterv1.Machi }, nil } -func (r *Reconciler) computeDesiredInfraMachine(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine) (*unstructured.Unstructured, error) { - ownerReference := &metav1.OwnerReference{ - APIVersion: clusterv1.GroupVersion.String(), - Kind: "MachineSet", - Name: ms.Name, - UID: ms.UID, +func (r *Reconciler) computeDesiredInfraMachine(ctx context.Context, ms *clusterv1.MachineSet, machine *clusterv1.Machine, existingInfraMachine *unstructured.Unstructured) (*unstructured.Unstructured, error) { + var ownerReference *metav1.OwnerReference + if existingInfraMachine == nil || !util.HasOwner(existingInfraMachine.GetOwnerReferences(), clusterv1.GroupVersion.String(), []string{"Machine"}) { + ownerReference = &metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "MachineSet", + Name: ms.Name, + UID: ms.UID, + } } apiVersion, err := contract.GetAPIVersion(ctx, r.Client, ms.Spec.Template.Spec.InfrastructureRef.GroupKind()) @@ -1689,6 +2069,10 @@ func (r *Reconciler) computeDesiredInfraMachine(ctx context.Context, ms *cluster if err != nil { return nil, errors.Wrap(err, "failed to compute desired InfraMachine") } + + if existingInfraMachine != nil { + infraMachine.SetUID(existingInfraMachine.GetUID()) + } return infraMachine, nil } diff --git a/internal/controllers/machineset/machineset_controller_status_test.go b/internal/controllers/machineset/machineset_controller_status_test.go index c4edcd75bc25..6b5486e93b21 100644 --- a/internal/controllers/machineset/machineset_controller_status_test.go +++ b/internal/controllers/machineset/machineset_controller_status_test.go @@ -1140,12 +1140,31 @@ func fakeMachine(name string, options ...fakeMachinesOption) *clusterv1.Machine return p } +func withOwnerMachineSet(msName string) fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: clusterv1.GroupVersion.String(), + Kind: "MachineSet", + Name: msName, + Controller: ptr.To(true), + }, + } + } +} + func withCreationTimestamp(t time.Time) fakeMachinesOption { return func(m *clusterv1.Machine) { m.CreationTimestamp = metav1.Time{Time: t} } } +func withMachineFinalizer() fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.Finalizers = []string{clusterv1.MachineFinalizer} + } +} + func withDeletionTimestamp() fakeMachinesOption { return func(m *clusterv1.Machine) { m.DeletionTimestamp = ptr.To(metav1.Time{Time: time.Now()}) @@ -1158,6 +1177,18 @@ func withStaleDeletionTimestamp() fakeMachinesOption { } } +func withMachineLabels(labels map[string]string) fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.Labels = labels + } +} + +func withMachineAnnotations(annotations map[string]string) fakeMachinesOption { + return func(m *clusterv1.Machine) { + m.Annotations = annotations + } +} + func withStaleDrain() fakeMachinesOption { return func(m *clusterv1.Machine) { if m.Status.Deletion == nil { @@ -1172,3 +1203,18 @@ func withCondition(c metav1.Condition) fakeMachinesOption { conditions.Set(m, c) } } + +func withHealthyNode() fakeMachinesOption { + // Note: This is what is required by delete priority functions to consider the machine healthy. + return func(m *clusterv1.Machine) { + m.Status = clusterv1.MachineStatus{ + NodeRef: clusterv1.MachineNodeReference{Name: "some-node"}, + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineNodeHealthyCondition, + Status: metav1.ConditionTrue, + }, + }, + } + } +} diff --git a/internal/controllers/machineset/machineset_controller_test.go b/internal/controllers/machineset/machineset_controller_test.go index 99154f3414a8..5235c1176d9f 100644 --- a/internal/controllers/machineset/machineset_controller_test.go +++ b/internal/controllers/machineset/machineset_controller_test.go @@ -38,6 +38,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/client/interceptor" @@ -45,6 +46,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + runtimev1 "sigs.k8s.io/cluster-api/api/runtime/v1beta2" "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/util/ssa" @@ -999,8 +1001,10 @@ func TestAdoptOrphan(t *testing.T) { } } -func newMachineSet(name, cluster string, replicas int32) *clusterv1.MachineSet { - return &clusterv1.MachineSet{ +type newMachineSetOption func(m *clusterv1.MachineSet) + +func newMachineSet(name, cluster string, replicas int32, opts ...newMachineSetOption) *clusterv1.MachineSet { + ms := &clusterv1.MachineSet{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: metav1.NamespaceDefault, @@ -1034,6 +1038,28 @@ func newMachineSet(name, cluster string, replicas int32) *clusterv1.MachineSet { }}, }, } + for _, opt := range opts { + opt(ms) + } + return ms +} + +func withMachineSetLabels(labels map[string]string) newMachineSetOption { + return func(m *clusterv1.MachineSet) { + m.Labels = labels + } +} + +func withMachineSetAnnotations(annotations map[string]string) newMachineSetOption { + return func(m *clusterv1.MachineSet) { + m.Annotations = annotations + } +} + +func withDeletionOrder(order clusterv1.MachineSetDeletionOrder) newMachineSetOption { + return func(m *clusterv1.MachineSet) { + m.Spec.Deletion.Order = order + } } func TestMachineSetReconcile_MachinesCreatedConditionFalseOnBadInfraRef(t *testing.T) { @@ -1445,7 +1471,7 @@ func TestMachineSetReconciler_syncMachines(t *testing.T) { Manager: machineSetManagerName, Operation: metav1.ManagedFieldsOperationApply, APIVersion: clusterv1.GroupVersion.String(), - FieldsV1: fmt.Sprintf("{\"f:metadata\":{\"f:annotations\":{\"f:dropped-annotation\":{},\"f:modified-annotation\":{},\"f:preserved-annotation\":{}},\"f:finalizers\":{\"v:\\\"machine.cluster.x-k8s.io\\\"\":{}},\"f:labels\":{\"f:cluster.x-k8s.io/deployment-name\":{},\"f:cluster.x-k8s.io/set-name\":{},\"f:dropped-label\":{},\"f:modified-label\":{},\"f:preserved-label\":{}},\"f:ownerReferences\":{\"k:{\\\"uid\\\":\\\"%s\\\"}\":{}}},\"f:spec\":{\"f:bootstrap\":{\"f:configRef\":{\"f:apiGroup\":{},\"f:kind\":{},\"f:name\":{}}},\"f:clusterName\":{},\"f:infrastructureRef\":{\"f:apiGroup\":{},\"f:kind\":{},\"f:name\":{}},\"f:version\":{}}}", ms.UID), + FieldsV1: fmt.Sprintf("{\"f:metadata\":{\"f:annotations\":{\"f:dropped-annotation\":{},\"f:modified-annotation\":{},\"f:preserved-annotation\":{}},\"f:finalizers\":{\"v:\\\"machine.cluster.x-k8s.io\\\"\":{}},\"f:labels\":{\"f:cluster.x-k8s.io/deployment-name\":{},\"f:cluster.x-k8s.io/set-name\":{},\"f:dropped-label\":{},\"f:modified-label\":{},\"f:preserved-label\":{}},\"f:ownerReferences\":{\"k:{\\\"uid\\\":\\\"%s\\\"}\":{}}},\"f:spec\":{\"f:bootstrap\":{\"f:configRef\":{\"f:apiGroup\":{},\"f:kind\":{},\"f:name\":{}}},\"f:clusterName\":{},\"f:infrastructureRef\":{\"f:apiGroup\":{},\"f:kind\":{},\"f:name\":{}}}}", ms.UID), }, { // manager owns the finalizer. Manager: "manager", @@ -2319,241 +2345,1156 @@ func TestMachineSetReconciler_reconcileUnhealthyMachines(t *testing.T) { } func TestMachineSetReconciler_syncReplicas(t *testing.T) { - t.Run("should hold off on creating new machines when preflight checks do not pass", func(t *testing.T) { - g := NewWithT(t) - - // An upgrading control plane should cause the preflight checks to not pass. - controlPlaneUpgrading := builder.ControlPlane("default", "test-cp"). - WithVersion("v1.26.2"). - WithStatusFields(map[string]interface{}{ - "status.version": "v1.25.2", - }). - Build() - cluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "default", - }, - Spec: clusterv1.ClusterSpec{ - ControlPlaneRef: contract.ObjToContractVersionedObjectReference(controlPlaneUpgrading), - }, - } - machineSet := &clusterv1.MachineSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machineset", - Namespace: "default", + tests := []struct { + name string + getAndAdoptMachinesForMachineSetSucceeded bool + machineSet *clusterv1.MachineSet + machines []*clusterv1.Machine + expectMachinesToAdd *int + expectMachinesToDelete *int + expectedTargetMSName *string + expectedMachinesToMove *int + }{ + { + name: "no op when getAndAdoptMachinesForMachineSetSucceeded is false", + getAndAdoptMachinesForMachineSetSucceeded: false, + machineSet: newMachineSet("ms1", "cluster1", 0), + machines: []*clusterv1.Machine{}, + expectMachinesToAdd: nil, + expectMachinesToDelete: nil, + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + { + name: "no op when all the expected machine already exists", + getAndAdoptMachinesForMachineSetSucceeded: true, + machineSet: newMachineSet("ms1", "cluster1", 2), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](1), + expectMachinesToAdd: nil, + expectMachinesToDelete: nil, + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + { + name: "should create machines when too few exists", + getAndAdoptMachinesForMachineSetSucceeded: true, + machineSet: newMachineSet("ms1", "cluster1", 5), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), }, - } - - fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, builder.GenericControlPlaneCRD, machineSet).WithStatusSubresource(&clusterv1.MachineSet{}).Build() - r := &Reconciler{ - Client: fakeClient, - PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll), - } - s := &scope{ - cluster: cluster, - machineSet: machineSet, - machines: []*clusterv1.Machine{}, + expectMachinesToAdd: ptr.To(3), + expectMachinesToDelete: nil, + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + { + name: "should delete machines when too many exists and MS is not instructed to move to another MachineSet", getAndAdoptMachinesForMachineSetSucceeded: true, - } - result, err := r.syncReplicas(ctx, s) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result.IsZero()).To(BeFalse(), "syncReplicas should not return a 'zero' result") - - // Verify the proper condition is set on the MachineSet. - condition := clusterv1.MachinesCreatedV1Beta1Condition - g.Expect(v1beta1conditions.Has(machineSet, condition)). - To(BeTrue(), "MachineSet should have the %s condition set", condition) - machinesCreatedCondition := v1beta1conditions.Get(machineSet, condition) - g.Expect(machinesCreatedCondition.Status). - To(Equal(corev1.ConditionFalse), "%s condition status should be %s", condition, corev1.ConditionFalse) - g.Expect(machinesCreatedCondition.Reason). - To(Equal(clusterv1.PreflightCheckFailedV1Beta1Reason), "%s condition reason should be %s", condition, clusterv1.PreflightCheckFailedV1Beta1Reason) - - // Verify no new Machines are created. - machineList := &clusterv1.MachineList{} - g.Expect(r.Client.List(ctx, machineList)).To(Succeed()) - g.Expect(machineList.Items).To(BeEmpty(), "There should not be any machines") - }) + machineSet: newMachineSet("ms1", "cluster1", 2), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + fakeMachine("m3"), + fakeMachine("m4"), + }, + expectMachinesToAdd: nil, + expectMachinesToDelete: ptr.To(2), + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + { + name: "should move machines when too many exists and MS is instructed to move to another MachineSet", + getAndAdoptMachinesForMachineSetSucceeded: true, + machineSet: newMachineSet("ms1", "cluster1", 2, withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"})), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + fakeMachine("m3"), + fakeMachine("m4"), + }, + expectMachinesToAdd: nil, + expectMachinesToDelete: nil, + expectedTargetMSName: ptr.To("ms2"), + expectedMachinesToMove: ptr.To(2), + }, + { + name: "When the Machine set is receiving machines from other MachineSets, delete machines should delete only exceeding machines left after taking into account pending machines", + getAndAdoptMachinesForMachineSetSucceeded: true, + machineSet: newMachineSet("ms2", "cluster1", 2, withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1"})), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + fakeMachine("m3"), + fakeMachine("m4", withMachineAnnotations(map[string]string{clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + expectMachinesToAdd: nil, + expectMachinesToDelete: ptr.To(1), + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + { + name: "When the Machine set is receiving machines from other MachineSets, should not delete if there are no exceeding machines left after taking into account pending machines", + getAndAdoptMachinesForMachineSetSucceeded: true, + machineSet: newMachineSet("ms2", "cluster1", 2, withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1"})), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + fakeMachine("m3", withMachineAnnotations(map[string]string{clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + expectMachinesToAdd: nil, + expectMachinesToDelete: nil, + expectedTargetMSName: nil, + expectedMachinesToMove: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + s := &scope{ + machineSet: tt.machineSet, + machines: tt.machines, + getAndAdoptMachinesForMachineSetSucceeded: tt.getAndAdoptMachinesForMachineSetSucceeded, + } + + r := &Reconciler{ + overrideCreateMachines: func(_ context.Context, _ *scope, machinesToAdd int) (ctrl.Result, error) { + g.Expect(tt.expectMachinesToAdd).ToNot(BeNil(), "unexpected call to create machines") + g.Expect(machinesToAdd).To(Equal(*tt.expectMachinesToAdd), "call to create machines does not have the expected machinesToAdd number") + return ctrl.Result{}, nil + }, + overrideMoveMachines: func(_ context.Context, _ *scope, targetMSName string, machinesToMove int) (ctrl.Result, error) { + g.Expect(tt.expectedMachinesToMove).ToNot(BeNil(), "unexpected call to move machines") + g.Expect(tt.expectedTargetMSName).ToNot(BeNil(), "unexpected call to move machines") + g.Expect(targetMSName).To(Equal(*tt.expectedTargetMSName), "call to move machines does not have the expected targetMS name") + g.Expect(machinesToMove).To(Equal(*tt.expectedMachinesToMove), "call to move machines does not have the expected machinesToMove number") + return ctrl.Result{}, nil + }, + overrideDeleteMachines: func(_ context.Context, _ *scope, machinesToDelete int) (ctrl.Result, error) { + g.Expect(tt.expectMachinesToDelete).ToNot(BeNil(), "unexpected call to delete machines") + g.Expect(machinesToDelete).To(Equal(*tt.expectMachinesToDelete), "call to delete machines does not have the expected machinesToDelete number") + return ctrl.Result{}, nil + }, + } + res, err := r.syncReplicas(ctx, s) + g.Expect(err).ToNot(HaveOccurred(), "unexpected error when syncing replicas") + g.Expect(res.IsZero()).To(BeTrue(), "unexpected non zero result when syncing replicas") + }) + } } -func TestMachineSetReconciler_syncReplicas_WithErrors(t *testing.T) { - t.Run("should hold off on sync replicas when create Infrastructure of machine failed ", func(t *testing.T) { - g := NewWithT(t) - scheme := runtime.NewScheme() - g.Expect(apiextensionsv1.AddToScheme(scheme)).To(Succeed()) - g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) - - fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(builder.GenericBootstrapConfigTemplateCRD, builder.GenericInfrastructureMachineTemplateCRD).WithInterceptorFuncs(interceptor.Funcs{ - Apply: func(ctx context.Context, c client.WithWatch, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { - // simulate scenarios where infra object creation fails - clientObject, ok := obj.(client.Object) - if !ok { - return errors.Errorf("error during object creation: unexpected ApplyConfiguration") - } - if clientObject.GetObjectKind().GroupVersionKind().Kind == "GenericInfrastructureMachine" { - return fmt.Errorf("inject error for create machineInfrastructure") - } - return c.Apply(ctx, obj, opts...) - }, - }).Build() +func TestMachineSetReconciler_createMachines_preflightChecks(t *testing.T) { + // This test is not included in the table test for createMachines because it requires a specific setup. + g := NewWithT(t) - r := &Reconciler{ - Client: fakeClient, - // Note: This field is only used for unit tests that use fake client because the fake client does not properly set resourceVersion - // on BootstrapConfig/InfraMachine after ssa.Patch and then ssa.RemoveManagedFieldsForLabelsAndAnnotations would fail. - disableRemoveManagedFieldsForLabelsAndAnnotations: true, - } - testCluster := &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "default", - }, - } + // An upgrading control plane should cause the preflight checks to not pass. + controlPlaneUpgrading := builder.ControlPlane("default", "test-cp"). + WithVersion("v1.26.2"). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.25.2", + }). + Build() + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "default", + }, + Spec: clusterv1.ClusterSpec{ + ControlPlaneRef: contract.ObjToContractVersionedObjectReference(controlPlaneUpgrading), + }, + } + machineSet := &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-machineset", + Namespace: "default", + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: ptr.To[int32](1), + }, + } - duration10m := ptr.To(int32(10 * 60)) - machineSet := &clusterv1.MachineSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machineset1", - Namespace: metav1.NamespaceDefault, - Finalizers: []string{"block-deletion"}, - }, - Spec: clusterv1.MachineSetSpec{ - Replicas: ptr.To[int32](1), - ClusterName: "test-cluster", - Template: clusterv1.MachineTemplateSpec{ - Spec: clusterv1.MachineSpec{ - ClusterName: testCluster.Name, - Version: "v1.14.2", - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: clusterv1.ContractVersionedObjectReference{ - APIGroup: clusterv1.GroupVersionBootstrap.Group, - Kind: builder.GenericBootstrapConfigTemplateKind, - Name: "ms-template", - }, - }, - InfrastructureRef: clusterv1.ContractVersionedObjectReference{ - APIGroup: clusterv1.GroupVersionInfrastructure.Group, - Kind: builder.GenericInfrastructureMachineTemplateKind, + fakeClient := fake.NewClientBuilder().WithObjects(controlPlaneUpgrading, builder.GenericControlPlaneCRD, machineSet).WithStatusSubresource(&clusterv1.MachineSet{}).Build() + r := &Reconciler{ + Client: fakeClient, + PreflightChecks: sets.Set[clusterv1.MachineSetPreflightCheck]{}.Insert(clusterv1.MachineSetPreflightCheckAll), + } + s := &scope{ + cluster: cluster, + machineSet: machineSet, + machines: []*clusterv1.Machine{}, + getAndAdoptMachinesForMachineSetSucceeded: true, + } + result, err := r.createMachines(ctx, s, 3) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(result.IsZero()).To(BeFalse(), "syncReplicas should not return a 'zero' result") + + // Verify the proper condition is set on the MachineSet. + condition := clusterv1.MachinesCreatedV1Beta1Condition + g.Expect(v1beta1conditions.Has(machineSet, condition)). + To(BeTrue(), "MachineSet should have the %s condition set", condition) + machinesCreatedCondition := v1beta1conditions.Get(machineSet, condition) + g.Expect(machinesCreatedCondition.Status). + To(Equal(corev1.ConditionFalse), "%s condition status should be %s", condition, corev1.ConditionFalse) + g.Expect(machinesCreatedCondition.Reason). + To(Equal(clusterv1.PreflightCheckFailedV1Beta1Reason), "%s condition reason should be %s", condition, clusterv1.PreflightCheckFailedV1Beta1Reason) + + // Verify no new Machines are created. + machineList := &clusterv1.MachineList{} + g.Expect(r.Client.List(ctx, machineList)).To(Succeed()) + g.Expect(machineList.Items).To(BeEmpty(), "There should not be any machines") +} + +func TestMachineSetReconciler_createMachines(t *testing.T) { + machineSet := &clusterv1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "machineset1", + Namespace: metav1.NamespaceDefault, + }, + Spec: clusterv1.MachineSetSpec{ + Replicas: ptr.To[int32](1), + ClusterName: "test-cluster", + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + ClusterName: "test-cluster", + Version: "v1.14.2", + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: clusterv1.ContractVersionedObjectReference{ + APIGroup: clusterv1.GroupVersionBootstrap.Group, + Kind: builder.GenericBootstrapConfigTemplateKind, Name: "ms-template", }, - Deletion: clusterv1.MachineDeletionSpec{ - NodeDrainTimeoutSeconds: duration10m, - NodeDeletionTimeoutSeconds: duration10m, - NodeVolumeDetachTimeoutSeconds: duration10m, - }, - MinReadySeconds: ptr.To[int32](10), + }, + InfrastructureRef: clusterv1.ContractVersionedObjectReference{ + APIGroup: clusterv1.GroupVersionInfrastructure.Group, + Kind: builder.GenericInfrastructureMachineTemplateKind, + Name: "ms-template", }, }, }, - } + }, + } - // Create bootstrap template resource. - bootstrapResource := map[string]interface{}{ - "kind": "GenericBootstrapConfig", - "apiVersion": clusterv1.GroupVersionBootstrap.String(), - "metadata": map[string]interface{}{ - "annotations": map[string]interface{}{ - "precedence": "GenericBootstrapConfig", + // Create bootstrap template resource. + bootstrapResource := map[string]interface{}{ + "kind": builder.GenericBootstrapConfigKind, + "apiVersion": clusterv1.GroupVersionBootstrap.String(), + "metadata": map[string]interface{}{ + "annotations": map[string]interface{}{ + "precedence": "GenericBootstrapConfig", + }, + }, + } + + bootstrapTmpl := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{ + "template": bootstrapResource, + }, + }, + } + bootstrapTmpl.SetKind(builder.GenericBootstrapConfigTemplateKind) + bootstrapTmpl.SetAPIVersion(clusterv1.GroupVersionBootstrap.String()) + bootstrapTmpl.SetName("ms-template") + bootstrapTmpl.SetNamespace(metav1.NamespaceDefault) + + // Create infrastructure template resource. + infraResource := map[string]interface{}{ + "kind": builder.GenericInfrastructureMachineKind, + "apiVersion": clusterv1.GroupVersionInfrastructure.String(), + "metadata": map[string]interface{}{ + "annotations": map[string]interface{}{ + "precedence": "GenericInfrastructureMachineTemplate", + }, + }, + "spec": map[string]interface{}{ + "size": "3xlarge", + }, + } + infraTmpl := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{ + "template": infraResource, + }, + }, + } + infraTmpl.SetKind(builder.GenericInfrastructureMachineTemplateKind) + infraTmpl.SetAPIVersion(clusterv1.GroupVersionInfrastructure.String()) + infraTmpl.SetName("ms-template") + infraTmpl.SetNamespace(metav1.NamespaceDefault) + + tests := []struct { + name string + machinesToAdd int + interceptorFuncs func(i *int) interceptor.Funcs + wantMachines int + wantErr bool + wantErrorMessage string + }{ + { + name: "should create machines", + machinesToAdd: 4, + interceptorFuncs: func(_ *int) interceptor.Funcs { return interceptor.Funcs{} }, + wantMachines: 4, + wantErr: false, + }, + { + name: "should stop creating machines when there are failures and rollback partial changes", + machinesToAdd: 4, + interceptorFuncs: func(i *int) interceptor.Funcs { + return interceptor.Funcs{ + Apply: func(ctx context.Context, c client.WithWatch, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + clientObject, ok := obj.(client.Object) + if !ok { + return errors.Errorf("error during object creation: unexpected ApplyConfiguration") + } + if clientObject.GetObjectKind().GroupVersionKind().Kind == "Machine" { + *i++ + if *i == 2 { // Note: fail for the second machine only (there should not be call for following machines) + return fmt.Errorf("inject error for create Machine") + } + } + return c.Apply(ctx, obj, opts...) + }, + } + }, + wantMachines: 1, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + scheme := runtime.NewScheme() + g.Expect(apiextensionsv1.AddToScheme(scheme)).To(Succeed()) + g.Expect(clusterv1.AddToScheme(scheme)).To(Succeed()) + + i := 0 + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects( + builder.GenericBootstrapConfigTemplateCRD, + builder.GenericInfrastructureMachineTemplateCRD, + machineSet, + bootstrapTmpl, + infraTmpl, + ).WithInterceptorFuncs(tt.interceptorFuncs(&i)).Build() + + r := &Reconciler{ + Client: fakeClient, + recorder: record.NewFakeRecorder(32), + // Note: This field is only used for unit tests that use fake client because the fake client does not properly set resourceVersion + // on BootstrapConfig/InfraMachine after ssa.Patch and then ssa.RemoveManagedFieldsForLabelsAndAnnotations would fail. + disableRemoveManagedFieldsForLabelsAndAnnotations: true, + } + s := &scope{ + machineSet: machineSet, + machines: []*clusterv1.Machine{}, + getAndAdoptMachinesForMachineSetSucceeded: true, + } + res, err := r.createMachines(ctx, s, tt.machinesToAdd) + if tt.wantErr { + g.Expect(err).To(HaveOccurred(), "expected error when creating machines, got none") + } else { + g.Expect(err).ToNot(HaveOccurred(), "unexpected error when creating machines") + } + g.Expect(res.IsZero()).To(BeTrue(), "unexpected non zero result when creating machines") + + // Verify new Machines are created. + machineList := &clusterv1.MachineList{} + g.Expect(r.Client.List(ctx, machineList)).To(Succeed()) + g.Expect(machineList.Items).To(HaveLen(tt.wantMachines), "Unexpected machine") + + for _, machine := range machineList.Items { + // Verify boostrap object created + bootstrap := &unstructured.Unstructured{} + bootstrap.SetKind(machine.Spec.Bootstrap.ConfigRef.Kind) + bootstrap.SetAPIVersion(clusterv1.GroupVersionBootstrap.String()) + bootstrap.SetNamespace(machine.Namespace) + bootstrap.SetName(machine.Spec.Bootstrap.ConfigRef.Name) + g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(bootstrap), bootstrap)).To(Succeed(), "failed to get bootstrap object") + + // Verify infra object created + infra := &unstructured.Unstructured{} + infra.SetKind(machine.Spec.InfrastructureRef.Kind) + infra.SetAPIVersion(clusterv1.GroupVersionInfrastructure.String()) + infra.SetNamespace(machine.Namespace) + infra.SetName(machine.Spec.InfrastructureRef.Name) + g.Expect(r.Client.Get(ctx, client.ObjectKeyFromObject(infra), infra)).To(Succeed(), "failed to get infra object") + } + + // Verify no additional boostrap object created + bootstrapList := &unstructured.UnstructuredList{} + bootstrapList.SetGroupVersionKind(schema.GroupVersionKind{ + Group: bootstrapTmpl.GetObjectKind().GroupVersionKind().Group, + Version: bootstrapTmpl.GetObjectKind().GroupVersionKind().Version, + Kind: strings.TrimSuffix(bootstrapTmpl.GetObjectKind().GroupVersionKind().Kind, clusterv1.TemplateSuffix), + }) + g.Expect(r.Client.List(ctx, bootstrapList)).To(Succeed()) + g.Expect(bootstrapList.Items).To(HaveLen(tt.wantMachines), "Unexpected bootstrap objects") + + // Verify no additional infra object created + infraList := &unstructured.UnstructuredList{} + infraList.SetGroupVersionKind(schema.GroupVersionKind{ + Group: infraTmpl.GetObjectKind().GroupVersionKind().Group, + Version: infraTmpl.GetObjectKind().GroupVersionKind().Version, + Kind: strings.TrimSuffix(infraTmpl.GetObjectKind().GroupVersionKind().Kind, clusterv1.TemplateSuffix), + }) + g.Expect(r.Client.List(ctx, infraList)).To(Succeed()) + g.Expect(infraList.Items).To(HaveLen(tt.wantMachines), "Unexpected infra objects") + }) + } +} + +func TestMachineSetReconciler_deleteMachines(t *testing.T) { + tests := []struct { + name string + ms *clusterv1.MachineSet + machines []*clusterv1.Machine + machinesToDelete int + interceptorFuncs interceptor.Funcs + wantMachines []string + wantErr bool + wantErrorMessage string + }{ + { + name: "should delete machines using the given deletion order", + ms: newMachineSet("ms1", "cluster1", 2, withDeletionOrder(clusterv1.NewestMachineSetDeletionOrder)), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), // newest + }, + machinesToDelete: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachines: []string{"m1", "m2"}, // m3 and m4 deleted because they are newest and deletion order is NewestMachineSetDeletionOrder + wantErr: false, + }, + { + name: "should not delete more machines when enough machines are already deleting", + ms: newMachineSet("ms1", "cluster1", 2, withDeletionOrder(clusterv1.NewestMachineSetDeletionOrder)), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode(), withDeletionTimestamp()), + fakeMachine("m3", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode(), withDeletionTimestamp()), // newest + }, + machinesToDelete: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachines: []string{"m1", "m3"}, // m2 and m4 already deleted, no additional machines deleted + wantErr: false, + }, + { + name: "should delete machines when not enough machines are already deleting", + ms: newMachineSet("ms1", "cluster1", 2, withDeletionOrder(clusterv1.NewestMachineSetDeletionOrder)), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode(), withDeletionTimestamp()), // newest + }, + machinesToDelete: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachines: []string{"m1", "m2"}, // m3 deleted, m4 already deleted + wantErr: false, + }, + { + name: "should keep deleting machines when one deletion fails", + ms: newMachineSet("ms1", "cluster1", 2), // use default deletion order, oldest machine deleted first + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), // newest + }, + machinesToDelete: 2, + interceptorFuncs: interceptor.Funcs{ + Delete: func(ctx context.Context, client client.WithWatch, obj client.Object, opts ...client.DeleteOption) error { + if obj.GetName() == "m1" { + return fmt.Errorf("error when deleting m1") + } + return client.Delete(ctx, obj, opts...) }, }, + wantMachines: []string{"m1", "m3", "m4"}, // m1 and m2 deleted, but m1 failed to delete + wantErr: true, + wantErrorMessage: "error when deleting m1", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + objs := []client.Object{tt.ms} + for _, m := range tt.machines { + objs = append(objs, m) + } + fakeClient := fake.NewClientBuilder().WithObjects(objs...).WithInterceptorFuncs(tt.interceptorFuncs).Build() + r := &Reconciler{ + Client: fakeClient, + recorder: record.NewFakeRecorder(32), + } + s := &scope{ + machineSet: tt.ms, + machines: tt.machines, + } + res, err := r.deleteMachines(ctx, s, tt.machinesToDelete) + if tt.wantErr { + g.Expect(err).To(HaveOccurred(), "expected error when deleting machines, got none") + g.Expect(err.Error()).To(ContainSubstring(tt.wantErrorMessage)) + } else { + g.Expect(err).ToNot(HaveOccurred(), "unexpected error when deleting machines") + } + g.Expect(res.IsZero()).To(BeTrue(), "unexpected non zero result when deleting machines") + + machines := &clusterv1.MachineList{} + g.Expect(fakeClient.List(ctx, machines)).ToNot(HaveOccurred(), "unexpected error when listing machines") + + machineNames := []string{} + for i := range machines.Items { + if machines.Items[i].DeletionTimestamp.IsZero() { + machineNames = append(machineNames, machines.Items[i].Name) + } + } + g.Expect(machineNames).To(ConsistOf(tt.wantMachines), "Unexpected machine") + }) + } +} + +func TestMachineSetReconciler_startMoveMachines(t *testing.T) { + machinesByMachineSet := func(machines *clusterv1.MachineList, ms *clusterv1.MachineSet) []string { + msMachines := []string{} + for i := range machines.Items { + // Note: Checking both ownerReferences and unique label + if len(machines.Items[i].OwnerReferences) == 1 && + machines.Items[i].OwnerReferences[0].Kind == "MachineSet" && + machines.Items[i].OwnerReferences[0].Name == ms.Name && + machines.Items[i].Labels[clusterv1.MachineDeploymentUniqueLabel] == ms.Labels[clusterv1.MachineDeploymentUniqueLabel] { + msMachines = append(msMachines, machines.Items[i].Name) + } } + sort.Strings(msMachines) + return msMachines + } - bootstrapTmpl := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "spec": map[string]interface{}{ - "template": bootstrapResource, + tests := []struct { + name string + ms *clusterv1.MachineSet + targetMS *clusterv1.MachineSet + machines []*clusterv1.Machine + machinesToMove int + interceptorFuncs interceptor.Funcs + wantMachinesNotMoved []string + wantMovedMachines []string + wantErr bool + wantErrorMessage string + }{ + { + name: "should fail when taget ms cannot be found", + ms: newMachineSet("ms1", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("do-not-exist", "cluster1", 2), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), + }, + machinesToMove: 1, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m1", "m2"}, + wantMovedMachines: nil, + wantErr: true, + wantErrorMessage: "failed to get MachineSet", + }, + { + name: "should fail when current and taget ms disagree on the move operation", + ms: newMachineSet("ms1", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "456"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms3,ms4"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), + }, + machinesToMove: 1, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m1", "m2"}, + wantMovedMachines: nil, + wantErr: true, + wantErrorMessage: "MachineSet ms1 is set to move replicas to ms2, but ms2 only accepts Machines from ms3,ms4", + }, + { + name: "should fail when target MS doesn't have a unique label", + ms: newMachineSet("ms1", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + // unique label missing + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1,ms3"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withOwnerMachineSet("ms1"), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), + }, + machinesToMove: 1, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m1", "m2"}, + wantMovedMachines: nil, + wantErr: true, + wantErrorMessage: fmt.Sprintf("MachineSet ms2 does not have the %s label", clusterv1.MachineDeploymentUniqueLabel), + }, + { + name: "should move machines", + ms: newMachineSet("ms1", "cluster1", 2, + withDeletionOrder(clusterv1.NewestMachineSetDeletionOrder), + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "456"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1,ms3"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), // newest + }, + machinesToMove: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m1", "m2"}, + wantMovedMachines: []string{"m3", "m4"}, // newest machines moved first with NewestMachineSetDeletionOrder + wantErr: false, + }, + { + name: "should not move deleting machines, decrease the move count", + ms: newMachineSet("ms1", "cluster1", 2, + withDeletionOrder(clusterv1.NewestMachineSetDeletionOrder), + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "456"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1,ms3"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode(), withDeletionTimestamp()), // newest + }, + machinesToMove: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m1", "m2", "m4"}, + wantMovedMachines: []string{"m3"}, // newest machines moved first with NewestMachineSetDeletionOrder, but m4 is deleting so don't touch it + wantErr: false, + }, + { + name: "should not move more machines that are already updating in place, pick another machine instead", + ms: newMachineSet("ms1", "cluster1", 2, + // use default deletion order, oldest machine move first + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "456"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1,ms3"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode(), withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + fakeMachine("m3", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode(), withMachineAnnotations(map[string]string{runtimev1.PendingHooksAnnotation: "UpdateMachine"})), + fakeMachine("m4", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), // newest + }, + machinesToMove: 2, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotMoved: []string{"m2", "m3"}, + wantMovedMachines: []string{"m1", "m4"}, // oldest machines moved first with the default deletion order, but m2 and m3 are updating in place so don't touch them but pick other machines instead + wantErr: false, + }, + { + name: "should keep moving machines when one move fails", + ms: newMachineSet("ms1", "cluster1", 2, + // use default deletion order, oldest machine move first + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetMoveMachinesToMachineSetAnnotation: "ms2"}), + ), + targetMS: newMachineSet("ms2", "cluster1", 2, + withMachineSetLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "456"}), + withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms1,ms3"}), + ), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-4*time.Minute)), withHealthyNode()), // oldest + fakeMachine("m2", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-3*time.Minute)), withHealthyNode()), + fakeMachine("m3", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-2*time.Minute)), withHealthyNode()), + fakeMachine("m4", withOwnerMachineSet("ms1"), withMachineLabels(map[string]string{clusterv1.MachineDeploymentUniqueLabel: "123"}), withMachineFinalizer(), withCreationTimestamp(time.Now().Add(-1*time.Minute)), withHealthyNode()), // newest + }, + machinesToMove: 2, + interceptorFuncs: interceptor.Funcs{ + Patch: func(ctx context.Context, client client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error { + if obj.GetName() == "m1" { + return fmt.Errorf("error when moving m1") + } + return client.Patch(ctx, obj, patch, opts...) }, }, + wantMachinesNotMoved: []string{"m1", "m3", "m4"}, + wantMovedMachines: []string{"m2"}, // oldest machines moved first with the default deletion order, but m1 failed to move + wantErr: true, + wantErrorMessage: "error when moving m1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + objs := []client.Object{tt.ms} + if tt.targetMS.Name != "do-not-exist" { + objs = append(objs, tt.targetMS) + } + for _, m := range tt.machines { + objs = append(objs, m) + } + fakeClient := fake.NewClientBuilder().WithObjects(objs...).WithInterceptorFuncs(tt.interceptorFuncs).Build() + r := &Reconciler{ + Client: fakeClient, + recorder: record.NewFakeRecorder(32), + } + s := &scope{ + machineSet: tt.ms, + machines: tt.machines, + } + res, err := r.startMoveMachines(ctx, s, tt.targetMS.Name, tt.machinesToMove) + if tt.wantErr { + g.Expect(err).To(HaveOccurred(), "expected error when moving machines, got none") + g.Expect(err.Error()).To(ContainSubstring(tt.wantErrorMessage)) + } else { + g.Expect(err).ToNot(HaveOccurred(), "unexpected error when moving machines") + } + g.Expect(res.IsZero()).To(BeTrue(), "unexpected non zero result when moving machines") + + machines := &clusterv1.MachineList{} + g.Expect(fakeClient.List(ctx, machines)).ToNot(HaveOccurred(), "unexpected error when listing machines") + g.Expect(machinesByMachineSet(machines, tt.ms)).To(ConsistOf(tt.wantMachinesNotMoved)) + + movedMachines := machinesByMachineSet(machines, tt.targetMS) + g.Expect(movedMachines).To(ConsistOf(tt.wantMovedMachines)) + for _, name := range movedMachines { + for _, m := range machines.Items { + if m.Name == name { + g.Expect(m.Annotations).To(HaveKeyWithValue(clusterv1.UpdateInProgressAnnotation, "")) + g.Expect(m.Annotations).To(HaveKeyWithValue(clusterv1.PendingAcknowledgeMoveAnnotation, "")) + } + } + } + }) + } +} + +func TestMachineSetReconciler_triggerInPlaceUpdate(t *testing.T) { + machinesNotInPlaceUpdating := func(machines *clusterv1.MachineList) []string { + msMachines := []string{} + for i := range machines.Items { + if machines.Items[i].Annotations[runtimev1.PendingHooksAnnotation] != "UpdateMachine" { + msMachines = append(msMachines, machines.Items[i].Name) + } } - bootstrapTmpl.SetKind("GenericBootstrapConfigTemplate") - bootstrapTmpl.SetAPIVersion(clusterv1.GroupVersionBootstrap.String()) - bootstrapTmpl.SetName("ms-template") - bootstrapTmpl.SetNamespace(metav1.NamespaceDefault) - g.Expect(r.Client.Create(context.TODO(), bootstrapTmpl)).To(Succeed()) + sort.Strings(msMachines) + return msMachines + } + machinesInPlaceUpdating := func(machines *clusterv1.MachineList) []string { + msMachines := []string{} + for i := range machines.Items { + if machines.Items[i].Annotations[runtimev1.PendingHooksAnnotation] == "UpdateMachine" { + msMachines = append(msMachines, machines.Items[i].Name) + } + } + sort.Strings(msMachines) + return msMachines + } - // Create infrastructure template resource. - infraResource := map[string]interface{}{ - "kind": "GenericInfrastructureMachine", - "apiVersion": clusterv1.GroupVersionInfrastructure.String(), - "metadata": map[string]interface{}{ - "annotations": map[string]interface{}{ - "precedence": "GenericInfrastructureMachineTemplate", - }, + // Create bootstrap template resource. + bootstrapResource := map[string]interface{}{ + "kind": builder.GenericBootstrapConfigKind, + "apiVersion": clusterv1.GroupVersionBootstrap.String(), + "spec": map[string]interface{}{ + "foo": "bar", + }, + } + + bootstrapTmpl := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{ + "template": bootstrapResource, }, + }, + } + bootstrapTmpl.SetKind(builder.GenericBootstrapConfigTemplateKind) + bootstrapTmpl.SetAPIVersion(clusterv1.GroupVersionBootstrap.String()) + bootstrapTmpl.SetNamespace(metav1.NamespaceDefault) + bootstrapTmpl.SetName("ms-bootstrap-template") + + bootstrapObj := &unstructured.Unstructured{ + Object: bootstrapResource["spec"].(map[string]interface{}), + } + bootstrapObj.SetKind(builder.GenericBootstrapConfigKind) + bootstrapObj.SetAPIVersion(clusterv1.GroupVersionBootstrap.String()) + bootstrapObj.SetNamespace(metav1.NamespaceDefault) + bootstrapObj.SetName("bootstrap") + + // Create infrastructure template resource. + infraResource := map[string]interface{}{ + "kind": builder.GenericInfrastructureMachineKind, + "apiVersion": clusterv1.GroupVersionInfrastructure.String(), + "spec": map[string]interface{}{ + "size": "3xlarge", + }, + } + infraTmpl := &unstructured.Unstructured{ + Object: map[string]interface{}{ "spec": map[string]interface{}{ - "size": "3xlarge", + "template": infraResource, }, - } - infraTmpl := &unstructured.Unstructured{ - Object: map[string]interface{}{ - "spec": map[string]interface{}{ - "template": infraResource, + }, + } + infraTmpl.SetKind(builder.GenericInfrastructureMachineTemplateKind) + infraTmpl.SetAPIVersion(clusterv1.GroupVersionInfrastructure.String()) + infraTmpl.SetNamespace(metav1.NamespaceDefault) + infraTmpl.SetName("ms-infra-template") + + infraObj := &unstructured.Unstructured{ + Object: infraResource["spec"].(map[string]interface{}), + } + infraObj.SetKind(builder.GenericInfrastructureMachineKind) + infraObj.SetAPIVersion(clusterv1.GroupVersionInfrastructure.String()) + infraObj.SetNamespace(metav1.NamespaceDefault) + infraObj.SetName("infra") + + tests := []struct { + name string + ms *clusterv1.MachineSet + machines []*clusterv1.Machine + noBootStrapConfig bool + interceptorFuncs interceptor.Funcs + wantMachinesNotInPlaceUpdating []string + wantMachinesInPlaceUpdating []string + wantErr bool + wantErrorMessage string + }{ + { + name: "No op when machines did not start move / do not have the UpdateInProgressAnnotation", + ms: newMachineSet("ms1", "cluster1", 2), + machines: []*clusterv1.Machine{ + fakeMachine("m1"), + fakeMachine("m2"), + }, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotInPlaceUpdating: []string{"m1", "m2"}, + wantMachinesInPlaceUpdating: nil, + wantErr: false, + }, + { + name: "No op when in place upgrade has been already triggered", + ms: newMachineSet("ms1", "cluster1", 2), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", runtimev1.PendingHooksAnnotation: "UpdateMachine"})), + fakeMachine("m2", withMachineAnnotations(map[string]string{runtimev1.PendingHooksAnnotation: "UpdateMachine"})), // updating in place, failed to complete (to remove both UpdateInProgressAnnotation and PendingHooksAnnotation) + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(_ context.Context, _ client.WithWatch, _ client.ObjectKey, _ client.Object, _ ...client.GetOption) error { + return errors.New("injected error performing get") // we should not perform any get if in place upgrade has been already triggered }, }, - } - infraTmpl.SetKind("GenericInfrastructureMachineTemplate") - infraTmpl.SetAPIVersion(clusterv1.GroupVersionInfrastructure.String()) - infraTmpl.SetName("ms-template") - infraTmpl.SetNamespace(metav1.NamespaceDefault) - g.Expect(r.Client.Create(context.TODO(), infraTmpl)).To(Succeed()) + wantMachinesNotInPlaceUpdating: nil, + wantMachinesInPlaceUpdating: []string{"m1", "m2"}, + wantErr: false, + }, + { + name: "No op when it fails to get InfraMachine", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if obj.GetObjectKind().GroupVersionKind().Kind == builder.GenericInfrastructureMachineKind && key.Name == "m1" { + return errors.New("injected error when getting m1-infra") + } + return client.Get(ctx, key, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when getting m1-infra", + }, + { + name: "No op when it fails to compute desired InfraMachine", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if key.Name == "ms-infra-template" { + return errors.New("injected error when getting ms-infra-template") + } + return client.Get(ctx, key, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when getting ms-infra-template", + }, + { + name: "No op when it fails to get BootstrapConfig", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if obj.GetObjectKind().GroupVersionKind().Kind == builder.GenericBootstrapConfigKind && key.Name == "m1" { + return errors.New("injected error when getting m1-bootstrap") + } + return client.Get(ctx, key, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when getting m1-bootstrap", + }, + { + name: "No op when it fails to compute desired BootstrapConfig", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if key.Name == "ms-bootstrap-template" { + return errors.New("injected error when getting ms-bootstrap-template") + } + return client.Get(ctx, key, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when getting ms-bootstrap-template", + }, + { + name: "No op when it fails to update InfraMachine", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Apply: func(ctx context.Context, c client.WithWatch, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + clientObject, ok := obj.(client.Object) + if !ok { + return errors.Errorf("error during object creation: unexpected ApplyConfiguration") + } + if clientObject.GetObjectKind().GroupVersionKind().Kind == builder.GenericInfrastructureMachineKind && clientObject.GetName() == "m1" { + return errors.New("injected error when applying m1-infra") + } + return c.Apply(ctx, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when applying m1-infra", + }, + { + name: "No op when it fails to update boostrap config", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Apply: func(ctx context.Context, c client.WithWatch, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + clientObject, ok := obj.(client.Object) + if !ok { + return errors.Errorf("error during object creation: unexpected ApplyConfiguration") + } + if clientObject.GetObjectKind().GroupVersionKind().Kind == builder.GenericBootstrapConfigKind && clientObject.GetName() == "m1" { + return errors.New("injected error when applying m1-bootstrap") + } + return c.Apply(ctx, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when applying m1-bootstrap", + }, + { + name: "No op when it fails to update machine", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Apply: func(ctx context.Context, c client.WithWatch, obj runtime.ApplyConfiguration, opts ...client.ApplyOption) error { + clientObject, ok := obj.(client.Object) + if !ok { + return errors.Errorf("error during object creation: unexpected ApplyConfiguration") + } + if clientObject.GetName() == "m1" { + return errors.New("injected error when applying m1") + } + return c.Apply(ctx, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: true, + wantErrorMessage: "injected error when applying m1", + }, + { + name: "No op when ms is accepting moved replicas, machine is still pending acknowledge", + ms: newMachineSet("ms1", "cluster1", 1, withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms2"})), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantErr: false, + }, + { + name: "Trigger in-place when ms is accepting moved replicas, machine is still pending acknowledge, machine is acknowledged", + ms: newMachineSet("ms1", "cluster1", 1, withMachineSetAnnotations(map[string]string{clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation: "ms2", clusterv1.AcknowledgedMoveAnnotation: "m1"})), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesInPlaceUpdating: []string{"m1"}, + wantErr: false, + }, + { + name: "Trigger in-place when ms is not accepting anymore moved replicas, machine is still pending acknowledge", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesInPlaceUpdating: []string{"m1"}, + wantErr: false, + }, + { + name: "Keeps triggering in-place when one machine fails", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + fakeMachine("m2", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + fakeMachine("m3", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + interceptorFuncs: interceptor.Funcs{ + Get: func(ctx context.Context, client client.WithWatch, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if obj.GetObjectKind().GroupVersionKind().Kind == builder.GenericInfrastructureMachineKind && key.Name == "m1" { + return errors.New("injected error when getting m1-infra") + } + return client.Get(ctx, key, obj, opts...) + }, + }, + wantMachinesNotInPlaceUpdating: []string{"m1"}, + wantMachinesInPlaceUpdating: []string{"m2", "m3"}, + wantErr: true, + wantErrorMessage: "injected error when getting m1-infra", + }, + { + name: "Trigger in-place for machines without bootstrap config", + ms: newMachineSet("ms1", "cluster1", 1), + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineAnnotations(map[string]string{clusterv1.UpdateInProgressAnnotation: "", clusterv1.PendingAcknowledgeMoveAnnotation: ""})), + }, + noBootStrapConfig: true, + interceptorFuncs: interceptor.Funcs{}, + wantMachinesInPlaceUpdating: []string{"m1"}, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) - s := &scope{ - cluster: testCluster, - machineSet: machineSet, - machines: []*clusterv1.Machine{}, - getAndAdoptMachinesForMachineSetSucceeded: true, - } - _, err := r.syncReplicas(ctx, s) - g.Expect(err).To(HaveOccurred()) - g.Expect(err.Error()).To(ContainSubstring("inject error for create machineInfrastructure")) - - // Verify the proper condition is set on the MachineSet. - condition := clusterv1.MachinesCreatedV1Beta1Condition - g.Expect(v1beta1conditions.Has(machineSet, condition)).To(BeTrue(), "MachineSet should have the %s condition set", condition) - - machinesCreatedCondition := v1beta1conditions.Get(machineSet, condition) - g.Expect(machinesCreatedCondition.Status). - To(Equal(corev1.ConditionFalse), "%s condition status should be %s", condition, corev1.ConditionFalse) - g.Expect(machinesCreatedCondition.Reason). - To(Equal(clusterv1.InfrastructureTemplateCloningFailedV1Beta1Reason), "%s condition reason should be %s", condition, clusterv1.InfrastructureTemplateCloningFailedV1Beta1Reason) - - // Verify no new Machines are created. - machineList := &clusterv1.MachineList{} - g.Expect(r.Client.List(ctx, machineList)).To(Succeed()) - g.Expect(machineList.Items).To(BeEmpty(), "There should not be any machines") - - // Verify no boostrap object created - bootstrapList := &unstructured.UnstructuredList{} - bootstrapList.SetGroupVersionKind(schema.GroupVersionKind{ - Group: bootstrapTmpl.GetObjectKind().GroupVersionKind().Group, - Version: bootstrapTmpl.GetObjectKind().GroupVersionKind().Version, - Kind: strings.TrimSuffix(bootstrapTmpl.GetObjectKind().GroupVersionKind().Kind, clusterv1.TemplateSuffix), - }) - g.Expect(r.Client.List(ctx, bootstrapList)).To(Succeed()) - g.Expect(bootstrapList.Items).To(BeEmpty(), "There should not be any bootstrap object") - - // Verify no infra object created - infraList := &unstructured.UnstructuredList{} - infraList.SetGroupVersionKind(schema.GroupVersionKind{ - Group: infraTmpl.GetObjectKind().GroupVersionKind().Group, - Version: infraTmpl.GetObjectKind().GroupVersionKind().Version, - Kind: strings.TrimSuffix(infraTmpl.GetObjectKind().GroupVersionKind().Kind, clusterv1.TemplateSuffix), - }) - g.Expect(r.Client.List(ctx, infraList)).To(Succeed()) - g.Expect(infraList.Items).To(BeEmpty(), "There should not be any infra object") - }) -} + tt.ms.Spec.Template.Spec.InfrastructureRef = clusterv1.ContractVersionedObjectReference{ + APIGroup: infraTmpl.GroupVersionKind().Group, + Kind: infraTmpl.GetKind(), + Name: infraTmpl.GetName(), + } + + objs := []client.Object{ + builder.GenericInfrastructureMachineTemplateCRD.DeepCopy(), + builder.GenericInfrastructureMachineCRD.DeepCopy(), + builder.GenericBootstrapConfigTemplateCRD.DeepCopy(), + builder.GenericBootstrapConfigCRD.DeepCopy(), + tt.ms, + infraTmpl, + } + if !tt.noBootStrapConfig { + tt.ms.Spec.Template.Spec.Bootstrap.ConfigRef = clusterv1.ContractVersionedObjectReference{ + APIGroup: bootstrapTmpl.GroupVersionKind().Group, + Kind: bootstrapTmpl.GetKind(), + Name: bootstrapTmpl.GetName(), + } + objs = append(objs, bootstrapTmpl) + } + + for _, m := range tt.machines { + m.SetNamespace(tt.ms.Namespace) + + mInfraObj := infraObj.DeepCopy() + mInfraObj.SetName(m.Name) + m.Spec.InfrastructureRef = clusterv1.ContractVersionedObjectReference{ + APIGroup: mInfraObj.GroupVersionKind().Group, + Kind: mInfraObj.GetKind(), + Name: mInfraObj.GetName(), + } + objs = append(objs, m, mInfraObj) + + if !tt.noBootStrapConfig { + mBootstrapObj := bootstrapObj.DeepCopy() + mBootstrapObj.SetName(m.Name) + m.Spec.Bootstrap.ConfigRef = clusterv1.ContractVersionedObjectReference{ + APIGroup: mBootstrapObj.GroupVersionKind().Group, + Kind: mBootstrapObj.GetKind(), + Name: mBootstrapObj.GetName(), + } + objs = append(objs, mBootstrapObj) + } + } + fakeClient := fake.NewClientBuilder().WithObjects(objs...).WithInterceptorFuncs(tt.interceptorFuncs).Build() + r := &Reconciler{ + Client: fakeClient, + recorder: record.NewFakeRecorder(32), + } + s := &scope{ + machineSet: tt.ms, + machines: tt.machines, + } + res, err := r.triggerInPlaceUpdate(ctx, s) + if tt.wantErr { + g.Expect(err).To(HaveOccurred(), "expected error when triggering in place update, got none") + g.Expect(err.Error()).To(ContainSubstring(tt.wantErrorMessage)) + } else { + g.Expect(err).ToNot(HaveOccurred(), "unexpected error when triggering in place update") + } + g.Expect(res.IsZero()).To(BeTrue(), "unexpected non zero result when triggering in place update") + + machines := &clusterv1.MachineList{} + g.Expect(fakeClient.List(ctx, machines)).ToNot(HaveOccurred(), "unexpected error when listing machines") + g.Expect(machinesNotInPlaceUpdating(machines)).To(ConsistOf(tt.wantMachinesNotInPlaceUpdating)) -type computeDesiredMachineTestCase struct { - name string - ms *clusterv1.MachineSet - existingMachine *clusterv1.Machine - wantMachine *clusterv1.Machine - wantName []gomegatypes.GomegaMatcher + updatingMachines := machinesInPlaceUpdating(machines) + g.Expect(updatingMachines).To(ConsistOf(tt.wantMachinesInPlaceUpdating)) + }) + } } func TestComputeDesiredMachine(t *testing.T) { @@ -2659,7 +3600,13 @@ func TestComputeDesiredMachine(t *testing.T) { expectedUpdatedMachine.Spec.InfrastructureRef = *existingMachine.Spec.InfrastructureRef.DeepCopy() expectedUpdatedMachine.Spec.Bootstrap.ConfigRef = *existingMachine.Spec.Bootstrap.ConfigRef.DeepCopy() - tests := []computeDesiredMachineTestCase{ + tests := []struct { + name string + ms *clusterv1.MachineSet + existingMachine *clusterv1.Machine + wantMachine *clusterv1.Machine + wantName []gomegatypes.GomegaMatcher + }{ { name: "should return the correct Machine object when creating a new Machine", ms: &clusterv1.MachineSet{ diff --git a/internal/controllers/machineset/machineset_deletion_order.go b/internal/controllers/machineset/machineset_deletion_order.go index 9569548b5070..0cad8c357e00 100644 --- a/internal/controllers/machineset/machineset_deletion_order.go +++ b/internal/controllers/machineset/machineset_deletion_order.go @@ -33,23 +33,34 @@ type ( ) const ( - mustDelete deletePriority = 100.0 - shouldDelete deletePriority = 75.0 - betterDelete deletePriority = 50.0 - couldDelete deletePriority = 20.0 - mustNotDelete deletePriority = 0.0 + mustDelete deletePriority = 100.0 + shouldDeleteFirst deletePriority = 80.0 + shouldDelete deletePriority = 75.0 + betterDelete deletePriority = 50.0 + couldDelete deletePriority = 20.0 + mustNotDelete deletePriority = 0.0 secondsPerTenDays float64 = 864000 ) // maps the creation timestamp onto the 0-100 priority range. func oldestDeletionOrder(machine *clusterv1.Machine) deletePriority { + // Deleting machines must go first, otherwise deletion code will delete more machines while previously deleted machines + // are still deleting. if !machine.DeletionTimestamp.IsZero() { return mustDelete } + // If user expressed the intent to delete a machines, respect it by deleting this machine first when scaling down. if _, ok := machine.Annotations[clusterv1.DeleteMachineAnnotation]; ok { + return shouldDeleteFirst + } + // If there is machine still updating in progress and the MS is scaling down, consider this machine next + // so the system avoids to complete unnecessary in-place upgrades (drop machines not at the desired state first). + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { return shouldDelete } + // If there are machines not healthy, get rid of them next, because this will unblock the rollout + // while respecting the maxUnhealthy requirement. if !isMachineHealthy(machine) { return betterDelete } @@ -64,12 +75,22 @@ func oldestDeletionOrder(machine *clusterv1.Machine) deletePriority { } func newestDeletionOrder(machine *clusterv1.Machine) deletePriority { + // Deleting machines must go first, otherwise deletion code will delete more machines while previously deleted machines + // are still deleting. if !machine.DeletionTimestamp.IsZero() { return mustDelete } + // If user expressed the intent to delete a machines, respect it by deleting this machine first when scaling down. if _, ok := machine.Annotations[clusterv1.DeleteMachineAnnotation]; ok { + return shouldDeleteFirst + } + // If there is machine still updating in progress and the MS is scaling down, consider this machine next + // so the system avoids to complete unnecessary in-place upgrades (drop machines not at the desired state first). + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { return shouldDelete } + // If there are machines not healthy, get rid of them next, because this will unblock the rollout + // while respecting the maxUnhealthy requirement. if !isMachineHealthy(machine) { return betterDelete } @@ -77,12 +98,22 @@ func newestDeletionOrder(machine *clusterv1.Machine) deletePriority { } func randomDeletionOrder(machine *clusterv1.Machine) deletePriority { + // Deleting machines must go first, otherwise deletion code will delete more machines while previously deleted machines + // are still deleting. if !machine.DeletionTimestamp.IsZero() { return mustDelete } + // If user expressed the intent to delete a machines, respect it by deleting this machine first when scaling down. if _, ok := machine.Annotations[clusterv1.DeleteMachineAnnotation]; ok { + return shouldDeleteFirst + } + // If there is machine still updating in progress and the MS is scaling down, consider this machine next + // so the system avoids to complete unnecessary in-place upgrades (drop machines not at the desired state first). + if _, ok := machine.Annotations[clusterv1.UpdateInProgressAnnotation]; ok { return shouldDelete } + // If there are machines not healthy, get rid of them next, because this will unblock the rollout + // while respecting the maxUnhealthy requirement. if !isMachineHealthy(machine) { return betterDelete } @@ -106,10 +137,12 @@ func (m sortableMachines) Less(i, j int) bool { return priorityJ < priorityI // high to low } +func getMachinesToMovePrioritized(filteredMachines []*clusterv1.Machine, fun deletePriorityFunc) []*clusterv1.Machine { + return getMachinesToDeletePrioritized(filteredMachines, len(filteredMachines), fun) +} + func getMachinesToDeletePrioritized(filteredMachines []*clusterv1.Machine, diff int, fun deletePriorityFunc) []*clusterv1.Machine { - if diff >= len(filteredMachines) { - return filteredMachines - } else if diff <= 0 { + if diff <= 0 { return []*clusterv1.Machine{} } @@ -119,6 +152,9 @@ func getMachinesToDeletePrioritized(filteredMachines []*clusterv1.Machine, diff } sort.Sort(sortable) + if diff >= len(filteredMachines) { + return sortable.machines + } return sortable.machines[:diff] } diff --git a/internal/controllers/machineset/machineset_deletion_order_test.go b/internal/controllers/machineset/machineset_deletion_order_test.go index 44b7bd75e502..b21a65c1cfc3 100644 --- a/internal/controllers/machineset/machineset_deletion_order_test.go +++ b/internal/controllers/machineset/machineset_deletion_order_test.go @@ -27,7 +27,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) -func TestMachineToDelete(t *testing.T) { +func TestMachineRandomDelete(t *testing.T) { now := metav1.Now() nodeRef := clusterv1.MachineNodeReference{Name: "some-node"} healthyMachine := &clusterv1.Machine{Status: clusterv1.MachineStatus{NodeRef: nodeRef}} @@ -50,6 +50,10 @@ func TestMachineToDelete(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } + machineWithUpdateInProgressAnnotation := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.UpdateInProgressAnnotation: ""}}, + Status: clusterv1.MachineStatus{NodeRef: nodeRef}, + } deleteMachineWithoutNodeRef := &clusterv1.Machine{} nodeHealthyConditionFalseMachine := &clusterv1.Machine{ Status: clusterv1.MachineStatus{ @@ -208,11 +212,24 @@ func TestMachineToDelete(t *testing.T) { betterDeleteMachine, deleteMachineWithMachineAnnotation, betterDeleteMachine, + machineWithUpdateInProgressAnnotation, }, expect: []*clusterv1.Machine{ deleteMachineWithMachineAnnotation, }, }, + { + desc: "func=randomDeletionOrder, DeleteMachineAnnotation, diff=1", + diff: 1, + machines: []*clusterv1.Machine{ + betterDeleteMachine, + machineWithUpdateInProgressAnnotation, + betterDeleteMachine, + }, + expect: []*clusterv1.Machine{ + machineWithUpdateInProgressAnnotation, + }, + }, { desc: "func=randomDeletionOrder, MachineWithNoNodeRef, diff=1", diff: 1, @@ -312,6 +329,10 @@ func TestMachineNewestDelete(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } + machineWithUpdateInProgressAnnotation := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.UpdateInProgressAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, + Status: clusterv1.MachineStatus{NodeRef: nodeRef}, + } unhealthyMachine := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{ @@ -386,10 +407,18 @@ func TestMachineNewestDelete(t *testing.T) { desc: "func=newestDeletionOrder, diff=1 (DeleteMachineAnnotation)", diff: 1, machines: []*clusterv1.Machine{ - secondNewest, oldest, secondOldest, newest, deleteMachineWithMachineAnnotation, + secondNewest, oldest, secondOldest, newest, deleteMachineWithMachineAnnotation, machineWithUpdateInProgressAnnotation, }, expect: []*clusterv1.Machine{deleteMachineWithMachineAnnotation}, }, + { + desc: "func=newestDeletionOrder, diff=1 (UpdateInProgressAnnotation)", + diff: 1, + machines: []*clusterv1.Machine{ + secondNewest, oldest, secondOldest, newest, machineWithUpdateInProgressAnnotation, + }, + expect: []*clusterv1.Machine{machineWithUpdateInProgressAnnotation}, + }, { desc: "func=newestDeletionOrder, diff=1 (deleteMachineWithoutNodeRef)", diff: 1, @@ -461,6 +490,10 @@ func TestMachineOldestDelete(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.DeleteMachineAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } + machineWithUpdateInProgressAnnotation := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{Annotations: map[string]string{clusterv1.UpdateInProgressAnnotation: ""}, CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, + Status: clusterv1.MachineStatus{NodeRef: nodeRef}, + } unhealthyMachine := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(currentTime.AddDate(0, 0, -10))}, Status: clusterv1.MachineStatus{ @@ -571,10 +604,18 @@ func TestMachineOldestDelete(t *testing.T) { desc: "func=oldestDeletionOrder, diff=1 (DeleteMachineAnnotation)", diff: 1, machines: []*clusterv1.Machine{ - empty, secondNewest, oldest, secondOldest, newest, deleteMachineWithMachineAnnotation, + empty, secondNewest, oldest, secondOldest, newest, deleteMachineWithMachineAnnotation, machineWithUpdateInProgressAnnotation, }, expect: []*clusterv1.Machine{deleteMachineWithMachineAnnotation}, }, + { + desc: "func=oldestDeletionOrder, diff=1 (UpdateInProgressAnnotation)", + diff: 1, + machines: []*clusterv1.Machine{ + empty, secondNewest, oldest, secondOldest, newest, machineWithUpdateInProgressAnnotation, + }, + expect: []*clusterv1.Machine{machineWithUpdateInProgressAnnotation}, + }, { desc: "func=oldestDeletionOrder, diff=1 (deleteMachineWithoutNodeRef)", diff: 1, diff --git a/internal/hooks/tracking.go b/internal/hooks/tracking.go index f32ddd90fa0f..b497cdfe6873 100644 --- a/internal/hooks/tracking.go +++ b/internal/hooks/tracking.go @@ -45,6 +45,22 @@ func MarkAsPending(ctx context.Context, c client.Client, obj client.Object, hook return errors.Wrapf(err, "failed to mark %q hook(s) as pending", strings.Join(hookNames, ",")) } + MarkObjectAsPending(obj, hooks...) + if err := patchHelper.Patch(ctx, obj); err != nil { + return errors.Wrapf(err, "failed to mark %q hook(s) as pending", strings.Join(hookNames, ",")) + } + + return nil +} + +// MarkObjectAsPending adds to the object's PendingHooksAnnotation the intent to execute a hook after an operation completes. +// Usually this function is called when an operation is starting in order to track the intent to call an After hook later in the process. +func MarkObjectAsPending(obj client.Object, hooks ...runtimecatalog.Hook) { + hookNames := []string{} + for _, hook := range hooks { + hookNames = append(hookNames, runtimecatalog.HookName(hook)) + } + // Read the annotation of the objects and add the hook to the comma separated list annotations := obj.GetAnnotations() if annotations == nil { @@ -52,12 +68,6 @@ func MarkAsPending(ctx context.Context, c client.Client, obj client.Object, hook } annotations[runtimev1.PendingHooksAnnotation] = addToCommaSeparatedList(annotations[runtimev1.PendingHooksAnnotation], hookNames...) obj.SetAnnotations(annotations) - - if err := patchHelper.Patch(ctx, obj); err != nil { - return errors.Wrapf(err, "failed to mark %q hook(s) as pending", strings.Join(hookNames, ",")) - } - - return nil } // IsPending returns true if there is an intent to call a hook being tracked in the object's PendingHooksAnnotation.