From a535e7890e545e38fd7243c765b502a79549ba2c Mon Sep 17 00:00:00 2001 From: Yuvaraj Kakaraparthi Date: Tue, 28 Feb 2023 02:14:11 -0800 Subject: [PATCH 1/3] in-place propagation for KCP --- .../kubeadm/internal/control_plane.go | 19 +- .../internal/controllers/controller.go | 112 ++- .../internal/controllers/controller_test.go | 644 ++++++++++++++---- .../kubeadm/internal/controllers/helpers.go | 178 ++++- .../internal/controllers/helpers_test.go | 255 +++++-- .../internal/controllers/scale_test.go | 122 +++- .../internal/controllers/suite_test.go | 22 - .../internal/controllers/upgrade_test.go | 84 ++- controlplane/kubeadm/internal/filters.go | 53 +- controlplane/kubeadm/internal/filters_test.go | 24 +- .../machinedeployment_controller.go | 2 +- .../machineset/machineset_controller.go | 2 +- internal/util/ssa/managedfields.go | 2 +- internal/util/ssa/managedfields_test.go | 2 +- 14 files changed, 1145 insertions(+), 376 deletions(-) diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index b64a0bdaaf4f..b81f01d4e2a4 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -49,8 +49,8 @@ type ControlPlane struct { // TODO: we should see if we can combine these with the Machine objects so we don't have all these separate lookups // See discussion on https://github.com/kubernetes-sigs/cluster-api/pull/3405 - kubeadmConfigs map[string]*bootstrapv1.KubeadmConfig - infraResources map[string]*unstructured.Unstructured + KubeadmConfigs map[string]*bootstrapv1.KubeadmConfig + InfraResources map[string]*unstructured.Unstructured } // NewControlPlane returns an instantiated ControlPlane. @@ -77,8 +77,8 @@ func NewControlPlane(ctx context.Context, client client.Client, cluster *cluster Cluster: cluster, Machines: ownedMachines, machinesPatchHelpers: patchHelpers, - kubeadmConfigs: kubeadmConfigs, - infraResources: infraObjects, + KubeadmConfigs: kubeadmConfigs, + InfraResources: infraObjects, reconciliationTime: metav1.Now(), }, nil } @@ -158,7 +158,7 @@ func (c *ControlPlane) HasDeletingMachine() bool { // GetKubeadmConfig returns the KubeadmConfig of a given machine. func (c *ControlPlane) GetKubeadmConfig(machineName string) (*bootstrapv1.KubeadmConfig, bool) { - kubeadmConfig, ok := c.kubeadmConfigs[machineName] + kubeadmConfig, ok := c.KubeadmConfigs[machineName] return kubeadmConfig, ok } @@ -169,7 +169,7 @@ func (c *ControlPlane) MachinesNeedingRollout() collections.Machines { // Return machines if they are scheduled for rollout or if with an outdated configuration. return machines.Filter( - NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.infraResources, c.kubeadmConfigs, c.KCP), + NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP), ) } @@ -177,7 +177,7 @@ func (c *ControlPlane) MachinesNeedingRollout() collections.Machines { // plane's configuration and therefore do not require rollout. func (c *ControlPlane) UpToDateMachines() collections.Machines { return c.Machines.Filter( - collections.Not(NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.infraResources, c.kubeadmConfigs, c.KCP)), + collections.Not(NeedsRollout(&c.reconciliationTime, c.KCP.Spec.RolloutAfter, c.KCP.Spec.RolloutBefore, c.InfraResources, c.KubeadmConfigs, c.KCP)), ) } @@ -258,3 +258,8 @@ func (c *ControlPlane) PatchMachines(ctx context.Context) error { } return kerrors.NewAggregate(errList) } + +// SetPatchHelpers updates the patch helpers. +func (c *ControlPlane) SetPatchHelpers(patchHelpers map[string]*patch.Helper) { + c.machinesPatchHelpers = patchHelpers +} diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index 42be67d9e387..9be7586398d3 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -44,7 +44,8 @@ import ( "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/feature" - "sigs.k8s.io/cluster-api/internal/labels" + "sigs.k8s.io/cluster-api/internal/contract" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/collections" @@ -55,6 +56,8 @@ import ( "sigs.k8s.io/cluster-api/util/version" ) +const kcpManagerName = "capi-kubeadmcontrolplane" + // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io;bootstrap.cluster.x-k8s.io;controlplane.cluster.x-k8s.io,resources=*,verbs=get;list;watch;create;update;patch;delete @@ -77,6 +80,12 @@ type KubeadmControlPlaneReconciler struct { managementCluster internal.ManagementCluster managementClusterUncached internal.ManagementCluster + + // disableInPlacePropagation should only be used for tests. This is used to skip + // some parts of the controller that need SSA as the current test setup does not + // support SSA. This flag should be dropped after all affected tests are migrated + // to envtest. + disableInPlacePropagation bool } func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -331,25 +340,16 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * return ctrl.Result{}, err } + if !r.disableInPlacePropagation { + if err := r.syncMachines(ctx, controlPlane); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines") + } + } + // Aggregate the operational state of all the machines; while aggregating we are adding the // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) - // Ensure all required labels exist on the controlled Machines. - // This logic is needed to add the `cluster.x-k8s.io/control-plane-name` label to Machines - // which were created before the `cluster.x-k8s.io/control-plane-name` label was introduced - // or if a user manually removed the label. - // NOTE: Changes will be applied to the Machines in reconcileControlPlaneConditions. - // NOTE: cluster.x-k8s.io/control-plane is already set at this stage (it is used when reading controlPlane.Machines). - for i := range controlPlane.Machines { - machine := controlPlane.Machines[i] - // Note: MustEqualValue and MustFormatValue is used here as the label value can be a hash if the control plane - // name is longer than 63 characters. - if value, ok := machine.Labels[clusterv1.MachineControlPlaneNameLabel]; !ok || !labels.MustEqualValue(kcp.Name, value) { - machine.Labels[clusterv1.MachineControlPlaneNameLabel] = labels.MustFormatValue(kcp.Name) - } - } - // Updates conditions reporting the status of static pods and the status of the etcd cluster. // NOTE: Conditions reporting KCP operation progress like e.g. Resized or SpecUpToDate are inlined with the rest of the execution. if result, err := r.reconcileControlPlaneConditions(ctx, controlPlane); err != nil || !result.IsZero() { @@ -535,6 +535,86 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o client.Ob return nil } +// syncMachines updates Machines, InfrastructureMachines and KubeadmConfigs to propagate in-place mutable fields from KCP. +// Note: It also cleans up managed fields of all Machines so that Machines that were +// created/patched before (< v1.4.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA. +// Note: For InfrastructureMachines and KubeadmConfigs it also drops ownership of "metadata.labels" and +// "metadata.annotations" from "manager" so that "capi-kubeadmcontrolplane" can own these fields and can work with SSA. +// Otherwise, fields would be co-owned by our "old" "manager" and "capi-kubeadmcontrolplane" and then we would not be +// able to e.g. drop labels and annotations. +func (r *KubeadmControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *internal.ControlPlane) error { + patchHelpers := map[string]*patch.Helper{} + for machineName := range controlPlane.Machines { + m := controlPlane.Machines[machineName] + // If the machine is already being deleted, we don't need to update it. + if !m.DeletionTimestamp.IsZero() { + continue + } + + // Cleanup managed fields of all Machines. + // We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA) + // (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and + // "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations. + if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, kcpManagerName); err != nil { + return errors.Wrapf(err, "failed to update Machine: failed to adjust the managedFields of the Machine %s", klog.KObj(m)) + } + // Update Machine to propagate in-place mutable fields from KCP. + updatedMachine, err := r.updateMachine(ctx, m, controlPlane.KCP, controlPlane.Cluster) + if err != nil { + return errors.Wrapf(err, "failed to update Machine: %s", klog.KObj(m)) + } + controlPlane.Machines[machineName] = updatedMachine + // Since the machine is updated, re-create the patch helper so that any subsequent + // Patch calls use the correct base machine object to calculate the diffs. + // Example: reconcileControlPlaneConditions patches the machine objects in a subsequent call + // and, it should use the updated machine to calculate the diff. + // Note: If the patchHelpers are not re-computed based on the new updated machines, subsequent + // Patch calls will fail because the patch will be calculated based on an outdated machine and will error + // because of outdated resourceVersion. + // TODO: This should be cleaned-up to have a more streamline way of constructing and using patchHelpers. + patchHelper, err := patch.NewHelper(updatedMachine, r.Client) + if err != nil { + return errors.Wrapf(err, "failed to create patch helper for Machine %s", klog.KObj(updatedMachine)) + } + patchHelpers[machineName] = patchHelper + + labelsAndAnnotationsManagedFieldPaths := []contract.Path{ + {"f:metadata", "f:annotations"}, + {"f:metadata", "f:labels"}, + } + infraMachine := controlPlane.InfraResources[machineName] + // Cleanup managed fields of all InfrastructureMachines to drop ownership of labels and annotations + // from "manager". We do this so that InfrastructureMachines that are created using the Create method + // can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager" + // and "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations. + if err := ssa.DropManagedFields(ctx, r.Client, infraMachine, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil { + return errors.Wrapf(err, "failed to clean up managedFields of InfrastructureMachine %s", klog.KObj(infraMachine)) + } + // Update in-place mutating fields on InfrastructureMachine. + if err := r.updateExternalObject(ctx, infraMachine, controlPlane.KCP, controlPlane.Cluster); err != nil { + return errors.Wrapf(err, "failed to update InfrastructureMachine %s", klog.KObj(infraMachine)) + } + + kubeadmConfig := controlPlane.KubeadmConfigs[machineName] + // Note: Set the GroupVersionKind because updateExternalObject depends on it. + kubeadmConfig.SetGroupVersionKind(m.Spec.Bootstrap.ConfigRef.GroupVersionKind()) + // Cleanup managed fields of all KubeadmConfigs to drop ownership of labels and annotations + // from "manager". We do this so that KubeadmConfigs that are created using the Create method + // can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager" + // and "capi-kubeadmcontrolplane" and then we would not be able to e.g. drop labels and annotations. + if err := ssa.DropManagedFields(ctx, r.Client, kubeadmConfig, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil { + return errors.Wrapf(err, "failed to clean up managedFields of KubeadmConfig %s", klog.KObj(kubeadmConfig)) + } + // Update in-place mutating fields on BootstrapConfig. + if err := r.updateExternalObject(ctx, kubeadmConfig, controlPlane.KCP, controlPlane.Cluster); err != nil { + return errors.Wrapf(err, "failed to update KubeadmConfig %s", klog.KObj(kubeadmConfig)) + } + } + // Update the patch helpers. + controlPlane.SetPatchHelpers(patchHelpers) + return nil +} + // reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and // the status of the etcd cluster. func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *internal.ControlPlane) (ctrl.Result, error) { diff --git a/controlplane/kubeadm/internal/controllers/controller_test.go b/controlplane/kubeadm/internal/controllers/controller_test.go index 675399f14c8c..5e5598060482 100644 --- a/controlplane/kubeadm/internal/controllers/controller_test.go +++ b/controlplane/kubeadm/internal/controllers/controller_test.go @@ -52,12 +52,15 @@ import ( "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/feature" + "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/cluster-api/util/secret" ) @@ -474,7 +477,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { Machines: collections.Machines{}, Workload: fakeWorkloadCluster{}, } - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} for i := 0; i < 3; i++ { name := fmt.Sprintf("test-%d", i) m := &clusterv1.Machine{ @@ -540,7 +543,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { Machines: collections.Machines{}, Workload: fakeWorkloadCluster{}, } - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} for i := 0; i < 3; i++ { name := fmt.Sprintf("test-%d", i) m := &clusterv1.Machine{ @@ -650,7 +653,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { Machines: collections.Machines{}, Workload: fakeWorkloadCluster{}, } - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} for i := 0; i < 3; i++ { name := fmt.Sprintf("test-%d", i) m := &clusterv1.Machine{ @@ -732,7 +735,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { Workload: fakeWorkloadCluster{}, } - fakeClient := newFakeClient(fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy(), fmc.Machines["test0"].DeepCopy()) + fakeClient := newFakeClient(builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy(), fmc.Machines["test0"].DeepCopy()) fmc.Reader = fakeClient recorder := record.NewFakeRecorder(32) r := &KubeadmControlPlaneReconciler{ @@ -769,11 +772,6 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { crt, err := getTestCACert(key) g.Expect(err).To(BeNil()) - fmc := &fakeManagementCluster{ - Machines: collections.Machines{}, - Workload: fakeWorkloadCluster{}, - } - clusterSecret := &corev1.Secret{ // The Secret's Type is used by KCP to determine whether it is user-provided. // clusterv1.ClusterSecretType signals that the Secret is CAPI-provided. @@ -791,12 +789,20 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { }, } + kcpOwner := *metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")) + t.Run("add KCP owner for secrets with no controller reference", func(t *testing.T) { - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} - for _, purpose := range []secret.Purpose{secret.ClusterCA, secret.FrontProxyCA, secret.ServiceAccount, secret.EtcdCA} { + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + certificates := secret.Certificates{ + {Purpose: secret.ClusterCA}, + {Purpose: secret.FrontProxyCA}, + {Purpose: secret.ServiceAccount}, + {Purpose: secret.EtcdCA}, + } + for _, c := range certificates { s := clusterSecret.DeepCopy() // Set the secret name to the purpose - s.Name = secret.Name(cluster.Name, purpose) + s.Name = secret.Name(cluster.Name, c.Purpose) // Set the Secret Type to clusterv1.ClusterSecretType which signals this Secret was generated by CAPI. s.Type = clusterv1.ClusterSecretType @@ -804,15 +810,8 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { } fakeClient := newFakeClient(objs...) - fmc.Reader = fakeClient - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, - managementCluster: fmc, - managementClusterUncached: fmc, - } - _, err := r.reconcile(ctx, cluster, kcp) + err = ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -823,11 +822,17 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { }) t.Run("replace non-KCP controller with KCP controller reference", func(t *testing.T) { - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} - for _, purpose := range []secret.Purpose{secret.ClusterCA, secret.FrontProxyCA, secret.ServiceAccount, secret.EtcdCA} { + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + certificates := secret.Certificates{ + {Purpose: secret.ClusterCA}, + {Purpose: secret.FrontProxyCA}, + {Purpose: secret.ServiceAccount}, + {Purpose: secret.EtcdCA}, + } + for _, c := range certificates { s := clusterSecret.DeepCopy() // Set the secret name to the purpose - s.Name = secret.Name(cluster.Name, purpose) + s.Name = secret.Name(cluster.Name, c.Purpose) // Set the Secret Type to clusterv1.ClusterSecretType which signals this Secret was generated by CAPI. s.Type = clusterv1.ClusterSecretType @@ -846,15 +851,7 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { } fakeClient := newFakeClient(objs...) - fmc.Reader = fakeClient - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, - managementCluster: fmc, - managementClusterUncached: fmc, - } - - _, err := r.reconcile(ctx, cluster, kcp) + err := ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -867,11 +864,17 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { t.Run("does not add owner reference to user-provided secrets", func(t *testing.T) { g := NewWithT(t) - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} - for _, purpose := range []secret.Purpose{secret.ClusterCA, secret.FrontProxyCA, secret.ServiceAccount, secret.EtcdCA} { + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + certificates := secret.Certificates{ + {Purpose: secret.ClusterCA}, + {Purpose: secret.FrontProxyCA}, + {Purpose: secret.ServiceAccount}, + {Purpose: secret.EtcdCA}, + } + for _, c := range certificates { s := clusterSecret.DeepCopy() // Set the secret name to the purpose - s.Name = secret.Name(cluster.Name, purpose) + s.Name = secret.Name(cluster.Name, c.Purpose) // Set the Secret Type to any type which signals this Secret was is user-provided. s.Type = corev1.SecretTypeOpaque // Set the a controller owner reference of an unknown type on the secret. @@ -891,15 +894,7 @@ func TestKubeadmControlPlaneReconciler_ensureOwnerReferences(t *testing.T) { } fakeClient := newFakeClient(objs...) - fmc.Reader = fakeClient - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, - managementCluster: fmc, - managementClusterUncached: fmc, - } - - _, err := r.reconcile(ctx, cluster, kcp) + err := ensureCertificatesOwnerRef(ctx, fakeClient, client.ObjectKeyFromObject(cluster), certificates, kcpOwner) g.Expect(err).To(BeNil()) secrets := &corev1.SecretList{} @@ -1126,21 +1121,44 @@ func TestReconcileCertificateExpiries(t *testing.T) { } func TestReconcileInitializeControlPlane(t *testing.T) { + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconcile-initializecontrolplane") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) - cluster := newCluster(&types.NamespacedName{Name: "foo", Namespace: metav1.NamespaceDefault}) + cluster := newCluster(&types.NamespacedName{Name: "foo", Namespace: namespace.Name}) cluster.Spec = clusterv1.ClusterSpec{ ControlPlaneEndpoint: clusterv1.APIEndpoint{ Host: "test.local", Port: 9999, }, } + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + patchHelper, err := patch.NewHelper(cluster, env) + g.Expect(err).To(BeNil()) cluster.Status = clusterv1.ClusterStatus{InfrastructureReady: true} + g.Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) - genericMachineTemplate := &unstructured.Unstructured{ + genericInfrastructureMachineTemplate := &unstructured.Unstructured{ Object: map[string]interface{}{ - "kind": "GenericMachineTemplate", - "apiVersion": "generic.io/v1", + "kind": "GenericInfrastructureMachineTemplate", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", "metadata": map[string]interface{}{ "name": "infra-foo", "namespace": cluster.Namespace, @@ -1154,6 +1172,7 @@ func TestReconcileInitializeControlPlane(t *testing.T) { }, }, } + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate)).To(Succeed()) kcp := &controlplanev1.KubeadmControlPlane{ ObjectMeta: metav1.ObjectMeta{ @@ -1164,6 +1183,7 @@ func TestReconcileInitializeControlPlane(t *testing.T) { Kind: "Cluster", APIVersion: clusterv1.GroupVersion.String(), Name: cluster.Name, + UID: cluster.UID, }, }, }, @@ -1172,32 +1192,32 @@ func TestReconcileInitializeControlPlane(t *testing.T) { Version: "v1.16.6", MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ InfrastructureRef: corev1.ObjectReference{ - Kind: genericMachineTemplate.GetKind(), - APIVersion: genericMachineTemplate.GetAPIVersion(), - Name: genericMachineTemplate.GetName(), + Kind: genericInfrastructureMachineTemplate.GetKind(), + APIVersion: genericInfrastructureMachineTemplate.GetAPIVersion(), + Name: genericInfrastructureMachineTemplate.GetName(), Namespace: cluster.Namespace, }, }, KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{}, }, } - kcp.Default() - g.Expect(kcp.ValidateCreate()).To(Succeed()) + g.Expect(env.Create(ctx, kcp)).To(Succeed()) corednsCM := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "coredns", - Namespace: metav1.NamespaceSystem, + Namespace: namespace.Name, }, Data: map[string]string{ "Corefile": "original-core-file", }, } + g.Expect(env.Create(ctx, corednsCM)).To(Succeed()) kubeadmCM := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "kubeadm-config", - Namespace: metav1.NamespaceSystem, + Namespace: namespace.Name, }, Data: map[string]string{ "ClusterConfiguration": `apiServer: @@ -1208,15 +1228,25 @@ kind: ClusterConfiguration kubernetesVersion: metav1.16.1`, }, } + g.Expect(env.Create(ctx, kubeadmCM)).To(Succeed()) + corednsDepl := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ Name: "coredns", - Namespace: metav1.NamespaceSystem, + Namespace: namespace.Name, }, Spec: appsv1.DeploymentSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "coredns": "", + }, + }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Name: "coredns", + Labels: map[string]string{ + "coredns": "", + }, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ @@ -1227,88 +1257,471 @@ kubernetesVersion: metav1.16.1`, }, }, } + g.Expect(env.Create(ctx, corednsDepl)).To(Succeed()) - fakeClient := newFakeClient( - fakeGenericMachineTemplateCRD, - kcp.DeepCopy(), - cluster.DeepCopy(), - genericMachineTemplate.DeepCopy(), - corednsCM.DeepCopy(), - kubeadmCM.DeepCopy(), - corednsDepl.DeepCopy(), - ) expectedLabels := map[string]string{clusterv1.ClusterNameLabel: "foo"} r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, + Client: env, + APIReader: env.GetAPIReader(), recorder: record.NewFakeRecorder(32), managementCluster: &fakeManagementCluster{ - Management: &internal.Management{Client: fakeClient}, + Management: &internal.Management{Client: env}, Workload: fakeWorkloadCluster{ Workload: &internal.Workload{ - Client: fakeClient, + Client: env, }, Status: internal.ClusterStatus{}, }, }, managementClusterUncached: &fakeManagementCluster{ - Management: &internal.Management{Client: fakeClient}, + Management: &internal.Management{Client: env}, Workload: fakeWorkloadCluster{ Workload: &internal.Workload{ - Client: fakeClient, + Client: env, }, Status: internal.ClusterStatus{}, }, }, + disableInPlacePropagation: true, } result, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: util.ObjectKey(kcp)}) g.Expect(err).NotTo(HaveOccurred()) // this first requeue is to add finalizer g.Expect(result).To(Equal(ctrl.Result{})) - g.Expect(r.Client.Get(ctx, util.ObjectKey(kcp), kcp)).To(Succeed()) + g.Expect(r.APIReader.Get(ctx, util.ObjectKey(kcp), kcp)).To(Succeed()) g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) - result, err = r.Reconcile(ctx, ctrl.Request{NamespacedName: util.ObjectKey(kcp)}) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) - g.Expect(r.Client.Get(ctx, client.ObjectKey{Name: kcp.Name, Namespace: kcp.Namespace}, kcp)).To(Succeed()) - // Expect the referenced infrastructure template to have a Cluster Owner Reference. - g.Expect(fakeClient.Get(ctx, util.ObjectKey(genericMachineTemplate), genericMachineTemplate)).To(Succeed()) - g.Expect(genericMachineTemplate.GetOwnerReferences()).To(ContainElement(metav1.OwnerReference{ - APIVersion: clusterv1.GroupVersion.String(), - Kind: "Cluster", - Name: cluster.Name, - })) + g.Eventually(func(g Gomega) { + _, err = r.Reconcile(ctx, ctrl.Request{NamespacedName: util.ObjectKey(kcp)}) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(r.APIReader.Get(ctx, client.ObjectKey{Name: kcp.Name, Namespace: kcp.Namespace}, kcp)).To(Succeed()) + // Expect the referenced infrastructure template to have a Cluster Owner Reference. + g.Expect(env.GetAPIReader().Get(ctx, util.ObjectKey(genericInfrastructureMachineTemplate), genericInfrastructureMachineTemplate)).To(Succeed()) + g.Expect(genericInfrastructureMachineTemplate.GetOwnerReferences()).To(ContainElement(metav1.OwnerReference{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Cluster", + Name: cluster.Name, + UID: cluster.UID, + })) - // Always expect that the Finalizer is set on the passed in resource - g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) + // Always expect that the Finalizer is set on the passed in resource + g.Expect(kcp.Finalizers).To(ContainElement(controlplanev1.KubeadmControlPlaneFinalizer)) - g.Expect(kcp.Status.Selector).NotTo(BeEmpty()) - g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(1)) - g.Expect(conditions.IsFalse(kcp, controlplanev1.AvailableCondition)).To(BeTrue()) + g.Expect(kcp.Status.Selector).NotTo(BeEmpty()) + g.Expect(kcp.Status.Replicas).To(BeEquivalentTo(1)) + g.Expect(conditions.IsFalse(kcp, controlplanev1.AvailableCondition)).To(BeTrue()) - s, err := secret.GetFromNamespacedName(ctx, fakeClient, client.ObjectKey{Namespace: metav1.NamespaceDefault, Name: "foo"}, secret.ClusterCA) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(s).NotTo(BeNil()) - g.Expect(s.Data).NotTo(BeEmpty()) - g.Expect(s.Labels).To(Equal(expectedLabels)) + s, err := secret.GetFromNamespacedName(ctx, env, client.ObjectKey{Namespace: cluster.Namespace, Name: "foo"}, secret.ClusterCA) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(s).NotTo(BeNil()) + g.Expect(s.Data).NotTo(BeEmpty()) + g.Expect(s.Labels).To(Equal(expectedLabels)) - k, err := kubeconfig.FromSecret(ctx, fakeClient, util.ObjectKey(cluster)) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(k).NotTo(BeEmpty()) + k, err := kubeconfig.FromSecret(ctx, env, util.ObjectKey(cluster)) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(k).NotTo(BeEmpty()) - machineList := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(metav1.NamespaceDefault))).To(Succeed()) - g.Expect(machineList.Items).To(HaveLen(1)) + machineList := &clusterv1.MachineList{} + g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(machineList.Items).To(HaveLen(1)) - machine := machineList.Items[0] - g.Expect(machine.Name).To(HavePrefix(kcp.Name)) - // Newly cloned infra objects should have the infraref annotation. - infraObj, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Spec.InfrastructureRef.Namespace) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromNameAnnotation, genericMachineTemplate.GetName())) - g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromGroupKindAnnotation, genericMachineTemplate.GroupVersionKind().GroupKind().String())) + machine := machineList.Items[0] + g.Expect(machine.Name).To(HavePrefix(kcp.Name)) + // Newly cloned infra objects should have the infraref annotation. + infraObj, err := external.Get(ctx, r.Client, &machine.Spec.InfrastructureRef, machine.Spec.InfrastructureRef.Namespace) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromNameAnnotation, genericInfrastructureMachineTemplate.GetName())) + g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromGroupKindAnnotation, genericInfrastructureMachineTemplate.GroupVersionKind().GroupKind().String())) + }, 30*time.Second).Should(Succeed()) +} + +func TestKubeadmControlPlaneReconciler_syncMachines(t *testing.T) { + setup := func(t *testing.T, g *WithT) (*corev1.Namespace, *clusterv1.Cluster) { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconciler-sync-machines") + g.Expect(err).To(BeNil()) + + t.Log("Creating the Cluster") + cluster := &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: ns.Name, Name: "test-cluster"}} + g.Expect(env.Create(ctx, cluster)).To(Succeed()) + + t.Log("Creating the Cluster Kubeconfig Secret") + g.Expect(env.CreateKubeconfigSecret(ctx, cluster)).To(Succeed()) + + return ns, cluster + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace, cluster *clusterv1.Cluster) { + t.Helper() + + t.Log("Deleting the Cluster") + g.Expect(env.Delete(ctx, cluster)).To(Succeed()) + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + + g := NewWithT(t) + namespace, testCluster := setup(t, g) + defer teardown(t, g, namespace, testCluster) + + classicManager := "manager" + duration5s := &metav1.Duration{Duration: 5 * time.Second} + duration10s := &metav1.Duration{Duration: 10 * time.Second} + + // Existing InfraMachine + infraMachineSpec := map[string]interface{}{ + "infra-field": "infra-value", + } + existingInfraMachine := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "GenericInfrastructureMachine", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", + "metadata": map[string]interface{}{ + "name": "existing-inframachine", + "namespace": testCluster.Namespace, + "labels": map[string]string{ + "preserved-label": "preserved-value", + "dropped-label": "dropped-value", + "modified-label": "modified-value", + }, + "annotations": map[string]string{ + "preserved-annotation": "preserved-value", + "dropped-annotation": "dropped-value", + "modified-annotation": "modified-value", + }, + }, + "spec": infraMachineSpec, + }, + } + infraMachineRef := &corev1.ObjectReference{ + Kind: "GenericInfrastructureMachine", + Namespace: namespace.Name, + Name: "existing-inframachine", + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + } + // Note: use "manager" as the field owner to mimic the manager used before ClusterAPI v1.4.0. + g.Expect(env.Create(ctx, existingInfraMachine, client.FieldOwner("manager"))).To(Succeed()) + + // Existing KubeadmConfig + bootstrapSpec := &bootstrapv1.KubeadmConfigSpec{ + Users: []bootstrapv1.User{{Name: "test-user"}}, + JoinConfiguration: &bootstrapv1.JoinConfiguration{}, + } + existingKubeadmConfig := &bootstrapv1.KubeadmConfig{ + TypeMeta: metav1.TypeMeta{ + Kind: "KubeadmConfig", + APIVersion: bootstrapv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "existing-kubeadmconfig", + Namespace: namespace.Name, + Labels: map[string]string{ + "preserved-label": "preserved-value", + "dropped-label": "dropped-value", + "modified-label": "modified-value", + }, + Annotations: map[string]string{ + "preserved-annotation": "preserved-value", + "dropped-annotation": "dropped-value", + "modified-annotation": "modified-value", + }, + }, + Spec: *bootstrapSpec, + } + bootstrapRef := &corev1.ObjectReference{ + Kind: "KubeadmConfig", + Namespace: namespace.Name, + Name: "existing-kubeadmconfig", + APIVersion: bootstrapv1.GroupVersion.String(), + } + // Note: use "manager" as the field owner to mimic the manager used before ClusterAPI v1.4.0. + g.Expect(env.Create(ctx, existingKubeadmConfig, client.FieldOwner("manager"))).To(Succeed()) + + // Existing Machine to validate in-place mutation + fd := pointer.String("fd1") + inPlaceMutatingMachine := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + Kind: "Machine", + APIVersion: clusterv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "existing-machine", + Namespace: namespace.Name, + Labels: map[string]string{ + "preserved-label": "preserved-value", + "dropped-label": "dropped-value", + "modified-label": "modified-value", + }, + Annotations: map[string]string{ + "preserved-annotation": "preserved-value", + "dropped-annotation": "dropped-value", + "modified-annotation": "modified-value", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: testCluster.Name, + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: bootstrapRef, + }, + InfrastructureRef: *infraMachineRef, + Version: pointer.String("v1.25.3"), + FailureDomain: fd, + ProviderID: pointer.String("provider-id"), + NodeDrainTimeout: duration5s, + NodeVolumeDetachTimeout: duration5s, + NodeDeletionTimeout: duration5s, + }, + } + // Note: use "manager" as the field owner to mimic the manager used before ClusterAPI v1.4.0. + g.Expect(env.Create(ctx, inPlaceMutatingMachine, client.FieldOwner("manager"))).To(Succeed()) + + // Existing machine that is in deleting state + deletingMachine := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "deleting-machine", + Namespace: namespace.Name, + Labels: map[string]string{}, + Annotations: map[string]string{}, + Finalizers: []string{"testing-finalizer"}, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: testCluster.Name, + InfrastructureRef: corev1.ObjectReference{ + Namespace: namespace.Name, + }, + Bootstrap: clusterv1.Bootstrap{ + DataSecretName: pointer.String("machine-bootstrap-secret"), + }, + }, + } + g.Expect(env.Create(ctx, deletingMachine, client.FieldOwner(classicManager))).To(Succeed()) + // Delete the machine to put it in the deleting state + g.Expect(env.Delete(ctx, deletingMachine)).To(Succeed()) + // Wait till the machine is marked for deletion + g.Eventually(func() bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(deletingMachine), deletingMachine); err != nil { + return false + } + return !deletingMachine.DeletionTimestamp.IsZero() + }, 30*time.Second).Should(BeTrue()) + + kcp := &controlplanev1.KubeadmControlPlane{ + TypeMeta: metav1.TypeMeta{ + Kind: "KubeadmControlPlane", + APIVersion: controlplanev1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + UID: types.UID("abc-123-control-plane"), + Name: "existing-kcp", + Namespace: namespace.Name, + }, + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.26.1", + KubeadmConfigSpec: *bootstrapSpec, + MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ + ObjectMeta: clusterv1.ObjectMeta{ + Labels: map[string]string{ + "preserved-label": "preserved-value", // Label will be preserved while testing in-place mutation. + "dropped-label": "dropped-value", // Label will be dropped while testing in-place mutation. + "modified-label": "modified-value", // Label value will be modified while testing in-place mutation. + }, + Annotations: map[string]string{ + "preserved-annotation": "preserved-value", // Annotation will be preserved while testing in-place mutation. + "dropped-annotation": "dropped-value", // Annotation will be dropped while testing in-place mutation. + "modified-annotation": "modified-value", // Annotation value will be modified while testing in-place mutation. + }, + }, + InfrastructureRef: corev1.ObjectReference{ + Kind: "GenericInfrastructureMachineTemplate", + Namespace: namespace.Name, + Name: "infra-foo", + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + }, + NodeDrainTimeout: duration5s, + NodeVolumeDetachTimeout: duration5s, + NodeDeletionTimeout: duration5s, + }, + }, + } + + controlPlane := &internal.ControlPlane{ + KCP: kcp, + Cluster: testCluster, + Machines: collections.Machines{ + inPlaceMutatingMachine.Name: inPlaceMutatingMachine, + deletingMachine.Name: deletingMachine, + }, + KubeadmConfigs: map[string]*bootstrapv1.KubeadmConfig{ + inPlaceMutatingMachine.Name: existingKubeadmConfig, + deletingMachine.Name: nil, + }, + InfraResources: map[string]*unstructured.Unstructured{ + inPlaceMutatingMachine.Name: existingInfraMachine, + deletingMachine.Name: nil, + }, + } + + // + // Verify Managed Fields + // + + // Run syncMachines to clean up managed fields and have proper field ownership + // for Machines, InfrastructureMachines and KubeadmConfigs. + reconciler := &KubeadmControlPlaneReconciler{Client: env} + g.Expect(reconciler.syncMachines(ctx, controlPlane)).To(Succeed()) + + // The inPlaceMutatingMachine should have cleaned up managed fields. + updatedInplaceMutatingMachine := inPlaceMutatingMachine.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedInplaceMutatingMachine), updatedInplaceMutatingMachine)) + // Verify ManagedFields + g.Expect(updatedInplaceMutatingMachine.ManagedFields).Should( + ContainElement(ssa.MatchManagedFieldsEntry(kcpManagerName, metav1.ManagedFieldsOperationApply)), + "in-place mutable machine should contain an entry for SSA manager", + ) + g.Expect(updatedInplaceMutatingMachine.ManagedFields).ShouldNot( + ContainElement(ssa.MatchManagedFieldsEntry(classicManager, metav1.ManagedFieldsOperationUpdate)), + "in-place mutable machine should not contain an entry for old manager", + ) + + // The InfrastructureMachine should have ownership of "labels" and "annotations" transferred to + // "capi-kubeadmcontrolplane" manager. + updatedInfraMachine := existingInfraMachine.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedInfraMachine), updatedInfraMachine)) + + // Verify ManagedFields + g.Expect(updatedInfraMachine.GetManagedFields()).Should( + ssa.MatchFieldOwnership(kcpManagerName, metav1.ManagedFieldsOperationApply, contract.Path{"f:metadata", "f:labels"})) + g.Expect(updatedInfraMachine.GetManagedFields()).Should( + ssa.MatchFieldOwnership(kcpManagerName, metav1.ManagedFieldsOperationApply, contract.Path{"f:metadata", "f:annotations"})) + g.Expect(updatedInfraMachine.GetManagedFields()).ShouldNot( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:metadata", "f:labels"})) + g.Expect(updatedInfraMachine.GetManagedFields()).ShouldNot( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:metadata", "f:annotations"})) + // Verify ownership of other fields is not changed. + g.Expect(updatedInfraMachine.GetManagedFields()).Should( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:spec"})) + + // The KubeadmConfig should have ownership of "labels" and "annotations" transferred to + // "capi-kubeadmcontrolplane" manager. + updatedKubeadmConfig := existingKubeadmConfig.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedKubeadmConfig), updatedKubeadmConfig)) + + // Verify ManagedFields + g.Expect(updatedKubeadmConfig.GetManagedFields()).Should( + ssa.MatchFieldOwnership(kcpManagerName, metav1.ManagedFieldsOperationApply, contract.Path{"f:metadata", "f:labels"})) + g.Expect(updatedKubeadmConfig.GetManagedFields()).Should( + ssa.MatchFieldOwnership(kcpManagerName, metav1.ManagedFieldsOperationApply, contract.Path{"f:metadata", "f:annotations"})) + g.Expect(updatedKubeadmConfig.GetManagedFields()).ShouldNot( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:metadata", "f:labels"})) + g.Expect(updatedKubeadmConfig.GetManagedFields()).ShouldNot( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:metadata", "f:annotations"})) + // Verify ownership of other fields is not changed. + g.Expect(updatedKubeadmConfig.GetManagedFields()).Should( + ssa.MatchFieldOwnership(classicManager, metav1.ManagedFieldsOperationUpdate, contract.Path{"f:spec"})) + + // + // Verify In-place mutating fields + // + + // Update KCP and verify the in-place mutating fields are propagated. + kcp.Spec.MachineTemplate.ObjectMeta.Labels = map[string]string{ + "preserved-label": "preserved-value", // Keep the label and value as is + "modified-label": "modified-value-2", // Modify the value of the label + // Drop "dropped-label" + } + expectedLabels := map[string]string{ + "preserved-label": "preserved-value", + "modified-label": "modified-value-2", + clusterv1.ClusterNameLabel: testCluster.Name, + clusterv1.MachineControlPlaneLabel: "", + clusterv1.MachineControlPlaneNameLabel: kcp.Name, + } + kcp.Spec.MachineTemplate.ObjectMeta.Annotations = map[string]string{ + "preserved-annotation": "preserved-value", // Keep the annotation and value as is + "modified-annotation": "modified-value-2", // Modify the value of the annotation + // Drop "dropped-annotation" + } + kcp.Spec.MachineTemplate.NodeDrainTimeout = duration10s + kcp.Spec.MachineTemplate.NodeDeletionTimeout = duration10s + kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout = duration10s + + // Use the updated KCP. + controlPlane.KCP = kcp + g.Expect(reconciler.syncMachines(ctx, controlPlane)).To(Succeed()) + + // Verify in-place mutable fields are updated on the Machine. + updatedInplaceMutatingMachine = inPlaceMutatingMachine.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedInplaceMutatingMachine), updatedInplaceMutatingMachine)) + // Verify Labels + g.Expect(updatedInplaceMutatingMachine.Labels).Should(Equal(expectedLabels)) + // Verify Annotations + g.Expect(updatedInplaceMutatingMachine.Annotations).Should(Equal(kcp.Spec.MachineTemplate.ObjectMeta.Annotations)) + // Verify Node timeout values + g.Expect(updatedInplaceMutatingMachine.Spec.NodeDrainTimeout).Should(And( + Not(BeNil()), + HaveValue(Equal(*kcp.Spec.MachineTemplate.NodeDrainTimeout)), + )) + g.Expect(updatedInplaceMutatingMachine.Spec.NodeDeletionTimeout).Should(And( + Not(BeNil()), + HaveValue(Equal(*kcp.Spec.MachineTemplate.NodeDeletionTimeout)), + )) + g.Expect(updatedInplaceMutatingMachine.Spec.NodeVolumeDetachTimeout).Should(And( + Not(BeNil()), + HaveValue(Equal(*kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout)), + )) + // Verify that the non in-place mutating fields remain the same. + g.Expect(updatedInplaceMutatingMachine.Spec.FailureDomain).Should(Equal(inPlaceMutatingMachine.Spec.FailureDomain)) + g.Expect(updatedInplaceMutatingMachine.Spec.ProviderID).Should(Equal(inPlaceMutatingMachine.Spec.ProviderID)) + g.Expect(updatedInplaceMutatingMachine.Spec.Version).Should(Equal(inPlaceMutatingMachine.Spec.Version)) + g.Expect(updatedInplaceMutatingMachine.Spec.InfrastructureRef).Should(Equal(inPlaceMutatingMachine.Spec.InfrastructureRef)) + g.Expect(updatedInplaceMutatingMachine.Spec.Bootstrap).Should(Equal(inPlaceMutatingMachine.Spec.Bootstrap)) + + // Verify in-place mutable fields are updated on InfrastructureMachine + updatedInfraMachine = existingInfraMachine.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedInfraMachine), updatedInfraMachine)) + // Verify Labels + g.Expect(updatedInfraMachine.GetLabels()).Should(Equal(expectedLabels)) + // Verify Annotations + g.Expect(updatedInfraMachine.GetAnnotations()).Should(Equal(kcp.Spec.MachineTemplate.ObjectMeta.Annotations)) + // Verify spec remains the same + g.Expect(updatedInfraMachine.Object).Should(HaveKeyWithValue("spec", infraMachineSpec)) + + // Verify in-place mutable fields are updated on the KubeadmConfig. + updatedKubeadmConfig = existingKubeadmConfig.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedKubeadmConfig), updatedKubeadmConfig)) + // Verify Labels + g.Expect(updatedKubeadmConfig.GetLabels()).Should(Equal(expectedLabels)) + // Verify Annotations + g.Expect(updatedKubeadmConfig.GetAnnotations()).Should(Equal(kcp.Spec.MachineTemplate.ObjectMeta.Annotations)) + // Verify spec remains the same + g.Expect(updatedKubeadmConfig.Spec).Should(Equal(existingKubeadmConfig.Spec)) + + // The deleting machine should not change. + updatedDeletingMachine := deletingMachine.DeepCopy() + g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(updatedDeletingMachine), updatedDeletingMachine)) + + // Verify ManagedFields + g.Expect(updatedDeletingMachine.ManagedFields).ShouldNot( + ContainElement(ssa.MatchManagedFieldsEntry(kcpManagerName, metav1.ManagedFieldsOperationApply)), + "deleting machine should not contain an entry for SSA manager", + ) + g.Expect(updatedDeletingMachine.ManagedFields).Should( + ContainElement(ssa.MatchManagedFieldsEntry("manager", metav1.ManagedFieldsOperationUpdate)), + "in-place mutable machine should still contain an entry for old manager", + ) + + // Verify the machine labels and annotations are unchanged. + g.Expect(updatedDeletingMachine.Labels).Should(Equal(deletingMachine.Labels)) + g.Expect(updatedDeletingMachine.Annotations).Should(Equal(deletingMachine.Annotations)) + // Verify the machine spec is unchanged. + g.Expect(updatedDeletingMachine.Spec).Should(Equal(deletingMachine.Spec)) } func TestKubeadmControlPlaneReconciler_updateCoreDNS(t *testing.T) { @@ -1811,10 +2224,10 @@ func createClusterWithControlPlane(namespace string) (*clusterv1.Cluster, *contr Spec: controlplanev1.KubeadmControlPlaneSpec{ MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ InfrastructureRef: corev1.ObjectReference{ - Kind: "GenericMachineTemplate", + Kind: "GenericInfrastructureMachineTemplate", Namespace: namespace, Name: "infra-foo", - APIVersion: "generic.io/v1", + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", }, }, Replicas: pointer.Int32(int32(3)), @@ -1832,22 +2245,17 @@ func createClusterWithControlPlane(namespace string) (*clusterv1.Cluster, *contr genericMachineTemplate := &unstructured.Unstructured{ Object: map[string]interface{}{ - "kind": "GenericMachineTemplate", - "apiVersion": "generic.io/v1", + "kind": "GenericInfrastructureMachineTemplate", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", "metadata": map[string]interface{}{ "name": "infra-foo", "namespace": namespace, - "ownerReferences": []interface{}{ - map[string]interface{}{ - "apiVersion": clusterv1.GroupVersion.String(), - "kind": "Cluster", - "name": kcpName, - }, - }, }, "spec": map[string]interface{}{ "template": map[string]interface{}{ - "spec": map[string]interface{}{}, + "spec": map[string]interface{}{ + "hello": "world", + }, }, }, }, diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index bae5e7918b2d..03a70934f434 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -26,10 +26,12 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apiserver/pkg/storage/names" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" @@ -213,7 +215,7 @@ func (r *KubeadmControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx conte // Only proceed to generating the Machine if we haven't encountered an error if len(errs) == 0 { - if err := r.generateMachine(ctx, kcp, cluster, infraRef, bootstrapRef, failureDomain); err != nil { + if err := r.createMachine(ctx, kcp, cluster, infraRef, bootstrapRef, failureDomain); err != nil { conditions.MarkFalse(kcp, controlplanev1.MachinesCreatedCondition, controlplanev1.MachineGenerationFailedReason, clusterv1.ConditionSeverityError, err.Error()) errs = append(errs, errors.Wrap(err, "failed to create Machine")) @@ -288,60 +290,170 @@ func (r *KubeadmControlPlaneReconciler) generateKubeadmConfig(ctx context.Contex return bootstrapRef, nil } -func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster, infraRef, bootstrapRef *corev1.ObjectReference, failureDomain *string) error { - machine := &clusterv1.Machine{ +// updateExternalObject updates the external object with the labels and annotations from KCP. +func (r *KubeadmControlPlaneReconciler) updateExternalObject(ctx context.Context, obj client.Object, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error { + updatedObject := &unstructured.Unstructured{} + updatedObject.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind()) + updatedObject.SetNamespace(obj.GetNamespace()) + updatedObject.SetName(obj.GetName()) + // Set the UID to ensure that Server-Side-Apply only performs an update + // and does not perform an accidental create. + updatedObject.SetUID(obj.GetUID()) + + // Update labels + updatedObject.SetLabels(internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name)) + // Update annotations + updatedObject.SetAnnotations(kcp.Spec.MachineTemplate.ObjectMeta.Annotations) + + patchOptions := []client.PatchOption{ + client.ForceOwnership, + client.FieldOwner(kcpManagerName), + } + if err := r.Client.Patch(ctx, updatedObject, client.Apply, patchOptions...); err != nil { + return errors.Wrapf(err, "failed to update %s", klog.KObj(obj)) + } + return nil +} + +func (r *KubeadmControlPlaneReconciler) createMachine(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster, infraRef, bootstrapRef *corev1.ObjectReference, failureDomain *string) error { + machine, err := r.computeDesiredMachine(kcp, cluster, infraRef, bootstrapRef, failureDomain, nil) + if err != nil { + return errors.Wrap(err, "failed to create Machine: failed to compute desired Machine") + } + patchOptions := []client.PatchOption{ + client.ForceOwnership, + client.FieldOwner(kcpManagerName), + } + if err := r.Client.Patch(ctx, machine, client.Apply, patchOptions...); err != nil { + return errors.Wrap(err, "failed to create Machine: apply failed") + } + // Remove the annotation tracking that a remediation is in progress (the remediation completed when + // the replacement machine has been created above). + delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation) + return nil +} + +func (r *KubeadmControlPlaneReconciler) updateMachine(ctx context.Context, machine *clusterv1.Machine, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) (*clusterv1.Machine, error) { + updatedMachine, err := r.computeDesiredMachine( + kcp, cluster, + &machine.Spec.InfrastructureRef, machine.Spec.Bootstrap.ConfigRef, + machine.Spec.FailureDomain, machine, + ) + if err != nil { + return nil, errors.Wrap(err, "failed to update Machine: failed to compute desired Machine") + } + patchOptions := []client.PatchOption{ + client.ForceOwnership, + client.FieldOwner(kcpManagerName), + } + if err := r.Client.Patch(ctx, updatedMachine, client.Apply, patchOptions...); err != nil { + return nil, errors.Wrap(err, "failed to update Machine: apply failed") + } + return updatedMachine, nil +} + +// computeDesiredMachine computes the desired Machine. +// This Machine will be used during reconciliation to: +// * create a new Machine +// * update an existing Machine +// Because we are using Server-Side-Apply we always have to calculate the full object. +// There are small differences in how we calculate the Machine depending on if it +// is a create or update. Example: for a new Machine we have to calculate a new name, +// while for an existing Machine we have to use the name of the existing Machine. +func (r *KubeadmControlPlaneReconciler) computeDesiredMachine(kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster, infraRef, bootstrapRef *corev1.ObjectReference, failureDomain *string, existingMachine *clusterv1.Machine) (*clusterv1.Machine, error) { + var machineName string + var machineUID types.UID + var version *string + annotations := map[string]string{} + if existingMachine == nil { + // Creating a new machine + machineName = names.SimpleNameGenerator.GenerateName(kcp.Name + "-") + version = &kcp.Spec.Version + + // Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane. + // We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any. + // Nb. This annotation is read when comparing the KubeadmConfig to check if a machine needs to be rolled out. + clusterConfig, err := json.Marshal(kcp.Spec.KubeadmConfigSpec.ClusterConfiguration) + if err != nil { + return nil, errors.Wrap(err, "failed to marshal cluster configuration") + } + annotations[controlplanev1.KubeadmClusterConfigurationAnnotation] = string(clusterConfig) + + // In case this machine is being created as a consequence of a remediation, then add an annotation + // tracking remediating data. + // NOTE: This is required in order to track remediation retries. + if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { + annotations[controlplanev1.RemediationForAnnotation] = remediationData + } + } else { + // Updating an existing machine + machineName = existingMachine.Name + machineUID = existingMachine.UID + version = existingMachine.Spec.Version + + // For existing machine only set the ClusterConfiguration annotation if the machine already has it. + // We should not add the annotation if it was missing in the first place because we do not have enough + // information. + if clusterConfig, ok := existingMachine.Annotations[controlplanev1.KubeadmClusterConfigurationAnnotation]; ok { + annotations[controlplanev1.KubeadmClusterConfigurationAnnotation] = clusterConfig + } + + // If the machine already has remediation data then preserve it. + // NOTE: This is required in order to track remediation retries. + if remediationData, ok := existingMachine.Annotations[controlplanev1.RemediationForAnnotation]; ok { + annotations[controlplanev1.RemediationForAnnotation] = remediationData + } + } + + // Construct the basic Machine. + desiredMachine := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + }, ObjectMeta: metav1.ObjectMeta{ - Name: names.SimpleNameGenerator.GenerateName(kcp.Name + "-"), - Namespace: kcp.Namespace, - Labels: internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name), - Annotations: map[string]string{}, + UID: machineUID, + Name: machineName, + Namespace: kcp.Namespace, // Note: by setting the ownerRef on creation we signal to the Machine controller that this is not a stand-alone Machine. OwnerReferences: []metav1.OwnerReference{ *metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")), }, + Labels: map[string]string{}, + Annotations: map[string]string{}, }, Spec: clusterv1.MachineSpec{ ClusterName: cluster.Name, - Version: &kcp.Spec.Version, + Version: version, + FailureDomain: failureDomain, InfrastructureRef: *infraRef, Bootstrap: clusterv1.Bootstrap{ ConfigRef: bootstrapRef, }, - FailureDomain: failureDomain, - NodeDrainTimeout: kcp.Spec.MachineTemplate.NodeDrainTimeout, - NodeDeletionTimeout: kcp.Spec.MachineTemplate.NodeDeletionTimeout, - NodeVolumeDetachTimeout: kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout, }, } - // In case this machine is being created as a consequence of a remediation, then add an annotation - // tracking remediating data. - // NOTE: This is required in order to track remediation retries. - if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok { - machine.Annotations[controlplanev1.RemediationForAnnotation] = remediationData - } + // Set the in-place mutable fields. + // When we create a new Machine we will just create the Machine with those fields. + // When we update an existing Machine will we update the fields on the existing Machine (in-place mutate). - // Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane. - // We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any. - clusterConfig, err := json.Marshal(kcp.Spec.KubeadmConfigSpec.ClusterConfiguration) - if err != nil { - return errors.Wrap(err, "failed to marshal cluster configuration") - } + // Set labels + desiredMachine.Labels = internal.ControlPlaneMachineLabelsForCluster(kcp, cluster.Name) + // Set annotations // Add the annotations from the MachineTemplate. // Note: we intentionally don't use the map directly to ensure we don't modify the map in KCP. for k, v := range kcp.Spec.MachineTemplate.ObjectMeta.Annotations { - machine.Annotations[k] = v + desiredMachine.Annotations[k] = v } - machine.Annotations[controlplanev1.KubeadmClusterConfigurationAnnotation] = string(clusterConfig) - - if err := r.Client.Create(ctx, machine); err != nil { - return errors.Wrap(err, "failed to create machine") + for k, v := range annotations { + desiredMachine.Annotations[k] = v } - // Remove the annotation tracking that a remediation is in progress (the remediation completed when - // the replacement machine has been created above). - delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation) + // Set other in-place mutable fields + desiredMachine.Spec.NodeDrainTimeout = kcp.Spec.MachineTemplate.NodeDrainTimeout + desiredMachine.Spec.NodeDeletionTimeout = kcp.Spec.MachineTemplate.NodeDeletionTimeout + desiredMachine.Spec.NodeVolumeDetachTimeout = kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout - return nil + return desiredMachine, nil } diff --git a/controlplane/kubeadm/internal/controllers/helpers_test.go b/controlplane/kubeadm/internal/controllers/helpers_test.go index 39a76fdb6799..a5775b46d38b 100644 --- a/controlplane/kubeadm/internal/controllers/helpers_test.go +++ b/controlplane/kubeadm/internal/controllers/helpers_test.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" @@ -33,7 +34,6 @@ import ( bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controllers/external" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" - "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/internal/test/builder" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/kubeconfig" @@ -337,19 +337,38 @@ func TestKubeadmControlPlaneReconciler_reconcileKubeconfig(t *testing.T) { } func TestCloneConfigsAndGenerateMachine(t *testing.T) { + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-applykubeadmconfig") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", - Namespace: metav1.NamespaceDefault, + Namespace: namespace.Name, }, } - genericMachineTemplate := &unstructured.Unstructured{ + genericInfrastructureMachineTemplate := &unstructured.Unstructured{ Object: map[string]interface{}{ - "kind": "GenericMachineTemplate", - "apiVersion": "generic.io/v1", + "kind": "GenericInfrastructureMachineTemplate", + "apiVersion": "infrastructure.cluster.x-k8s.io/v1beta1", "metadata": map[string]interface{}{ "name": "infra-foo", "namespace": cluster.Namespace, @@ -363,18 +382,20 @@ func TestCloneConfigsAndGenerateMachine(t *testing.T) { }, }, } + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate)).To(Succeed()) kcp := &controlplanev1.KubeadmControlPlane{ ObjectMeta: metav1.ObjectMeta{ Name: "kcp-foo", Namespace: cluster.Namespace, + UID: "abc-123-kcp-control-plane", }, Spec: controlplanev1.KubeadmControlPlaneSpec{ MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ InfrastructureRef: corev1.ObjectReference{ - Kind: genericMachineTemplate.GetKind(), - APIVersion: genericMachineTemplate.GetAPIVersion(), - Name: genericMachineTemplate.GetName(), + Kind: genericInfrastructureMachineTemplate.GetKind(), + APIVersion: genericInfrastructureMachineTemplate.GetAPIVersion(), + Name: genericInfrastructureMachineTemplate.GetName(), Namespace: cluster.Namespace, }, }, @@ -382,10 +403,8 @@ func TestCloneConfigsAndGenerateMachine(t *testing.T) { }, } - fakeClient := newFakeClient(cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()) - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, + Client: env, recorder: record.NewFakeRecorder(32), } @@ -395,7 +414,7 @@ func TestCloneConfigsAndGenerateMachine(t *testing.T) { g.Expect(r.cloneConfigsAndGenerateMachine(ctx, cluster, kcp, bootstrapSpec, nil)).To(Succeed()) machineList := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) g.Expect(machineList.Items).To(HaveLen(1)) for _, m := range machineList.Items { @@ -405,13 +424,13 @@ func TestCloneConfigsAndGenerateMachine(t *testing.T) { infraObj, err := external.Get(ctx, r.Client, &m.Spec.InfrastructureRef, m.Spec.InfrastructureRef.Namespace) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromNameAnnotation, genericMachineTemplate.GetName())) - g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromGroupKindAnnotation, genericMachineTemplate.GroupVersionKind().GroupKind().String())) + g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromNameAnnotation, genericInfrastructureMachineTemplate.GetName())) + g.Expect(infraObj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.TemplateClonedFromGroupKindAnnotation, genericInfrastructureMachineTemplate.GroupVersionKind().GroupKind().String())) g.Expect(m.Spec.InfrastructureRef.Namespace).To(Equal(cluster.Namespace)) - g.Expect(m.Spec.InfrastructureRef.Name).To(HavePrefix(genericMachineTemplate.GetName())) - g.Expect(m.Spec.InfrastructureRef.APIVersion).To(Equal(genericMachineTemplate.GetAPIVersion())) - g.Expect(m.Spec.InfrastructureRef.Kind).To(Equal("GenericMachine")) + g.Expect(m.Spec.InfrastructureRef.Name).To(HavePrefix(genericInfrastructureMachineTemplate.GetName())) + g.Expect(m.Spec.InfrastructureRef.APIVersion).To(Equal(genericInfrastructureMachineTemplate.GetAPIVersion())) + g.Expect(m.Spec.InfrastructureRef.Kind).To(Equal("GenericInfrastructureMachine")) g.Expect(m.Spec.Bootstrap.ConfigRef.Namespace).To(Equal(cluster.Namespace)) g.Expect(m.Spec.Bootstrap.ConfigRef.Name).To(HavePrefix(kcp.Name)) @@ -489,10 +508,7 @@ func TestCloneConfigsAndGenerateMachineFail(t *testing.T) { })) } -func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { - g := NewWithT(t) - fakeClient := newFakeClient() - +func TestKubeadmControlPlaneReconciler_computeDesiredMachine(t *testing.T) { cluster := &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: "testCluster", @@ -500,6 +516,9 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { }, } + duration5s := &metav1.Duration{Duration: 5 * time.Second} + duration10s := &metav1.Duration{Duration: 10 * time.Second} + kcpMachineTemplateObjectMeta := clusterv1.ObjectMeta{ Labels: map[string]string{ "machineTemplateLabel": "machineTemplateLabelValue", @@ -508,24 +527,28 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { "machineTemplateAnnotation": "machineTemplateAnnotationValue", }, } + kcpMachineTemplateObjectMetaCopy := kcpMachineTemplateObjectMeta.DeepCopy() kcp := &controlplanev1.KubeadmControlPlane{ ObjectMeta: metav1.ObjectMeta{ Name: "testControlPlane", Namespace: cluster.Namespace, - Annotations: map[string]string{ - controlplanev1.RemediationInProgressAnnotation: "foo", - }, }, Spec: controlplanev1.KubeadmControlPlaneSpec{ Version: "v1.16.6", MachineTemplate: controlplanev1.KubeadmControlPlaneMachineTemplate{ ObjectMeta: kcpMachineTemplateObjectMeta, - NodeVolumeDetachTimeout: &metav1.Duration{Duration: 10 * time.Second}, - NodeDeletionTimeout: &metav1.Duration{Duration: 10 * time.Second}, - NodeDrainTimeout: &metav1.Duration{Duration: 10 * time.Second}, + NodeDrainTimeout: duration5s, + NodeDeletionTimeout: duration5s, + NodeVolumeDetachTimeout: duration5s, + }, + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &bootstrapv1.ClusterConfiguration{ + ClusterName: "testCluster", + }, }, }, } + clusterConfigurationString := "{\"etcd\":{},\"networking\":{},\"apiServer\":{},\"controllerManager\":{},\"scheduler\":{},\"dns\":{},\"clusterName\":\"testCluster\"}" infraRef := &corev1.ObjectReference{ Kind: "InfraKind", @@ -539,53 +562,139 @@ func TestKubeadmControlPlaneReconciler_generateMachine(t *testing.T) { Name: "bootstrap", Namespace: cluster.Namespace, } - expectedMachineSpec := clusterv1.MachineSpec{ - ClusterName: cluster.Name, - Version: pointer.String(kcp.Spec.Version), - Bootstrap: clusterv1.Bootstrap{ - ConfigRef: bootstrapRef.DeepCopy(), - }, - InfrastructureRef: *infraRef.DeepCopy(), - NodeVolumeDetachTimeout: &metav1.Duration{Duration: 10 * time.Second}, - NodeDeletionTimeout: &metav1.Duration{Duration: 10 * time.Second}, - NodeDrainTimeout: &metav1.Duration{Duration: 10 * time.Second}, - } - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: &internal.Management{Client: fakeClient}, - recorder: record.NewFakeRecorder(32), - } - g.Expect(r.generateMachine(ctx, kcp, cluster, infraRef, bootstrapRef, nil)).To(Succeed()) - machineList := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(machineList.Items).To(HaveLen(1)) - machine := machineList.Items[0] - g.Expect(machine.Name).To(HavePrefix(kcp.Name)) - g.Expect(machine.Namespace).To(Equal(kcp.Namespace)) - g.Expect(machine.OwnerReferences).To(HaveLen(1)) - g.Expect(machine.OwnerReferences).To(ContainElement(*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")))) - g.Expect(machine.Annotations).To(HaveKeyWithValue(controlplanev1.RemediationForAnnotation, "foo")) - g.Expect(kcp.Annotations).ToNot(HaveKey(controlplanev1.RemediationInProgressAnnotation)) - g.Expect(machine.Spec).To(Equal(expectedMachineSpec)) - - // Verify that the machineTemplate.ObjectMeta has been propagated to the Machine. - for k, v := range kcpMachineTemplateObjectMeta.Labels { - g.Expect(machine.Labels[k]).To(Equal(v)) - } - g.Expect(machine.Labels[clusterv1.ClusterNameLabel]).To(Equal(cluster.Name)) - g.Expect(machine.Labels[clusterv1.MachineControlPlaneLabel]).To(Equal("")) - g.Expect(machine.Labels[clusterv1.MachineControlPlaneNameLabel]).To(Equal(kcp.Name)) - - for k, v := range kcpMachineTemplateObjectMeta.Annotations { - g.Expect(machine.Annotations[k]).To(Equal(v)) - } - - // Verify that machineTemplate.ObjectMeta in KCP has not been modified. - g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.ClusterNameLabel)) - g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneLabel)) - g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).NotTo(HaveKey(clusterv1.MachineControlPlaneNameLabel)) - g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).NotTo(HaveKey(controlplanev1.KubeadmClusterConfigurationAnnotation)) + t.Run("should return the correct Machine object when creating a new Machine", func(t *testing.T) { + g := NewWithT(t) + + failureDomain := pointer.String("fd1") + createdMachine, err := (&KubeadmControlPlaneReconciler{}).computeDesiredMachine( + kcp, cluster, + infraRef, bootstrapRef, + failureDomain, nil, + ) + g.Expect(err).To(BeNil()) + + expectedMachineSpec := clusterv1.MachineSpec{ + ClusterName: cluster.Name, + Version: pointer.String(kcp.Spec.Version), + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: bootstrapRef, + }, + InfrastructureRef: *infraRef, + FailureDomain: failureDomain, + NodeDrainTimeout: kcp.Spec.MachineTemplate.NodeDrainTimeout, + NodeDeletionTimeout: kcp.Spec.MachineTemplate.NodeDeletionTimeout, + NodeVolumeDetachTimeout: kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout, + } + g.Expect(createdMachine.Name).To(HavePrefix(kcp.Name)) + g.Expect(createdMachine.Namespace).To(Equal(kcp.Namespace)) + g.Expect(createdMachine.OwnerReferences).To(HaveLen(1)) + g.Expect(createdMachine.OwnerReferences).To(ContainElement(*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")))) + g.Expect(createdMachine.Spec).To(Equal(expectedMachineSpec)) + + // Verify that the machineTemplate.ObjectMeta has been propagated to the Machine. + // Verify labels. + expectedLabels := map[string]string{} + for k, v := range kcpMachineTemplateObjectMeta.Labels { + expectedLabels[k] = v + } + expectedLabels[clusterv1.ClusterNameLabel] = cluster.Name + expectedLabels[clusterv1.MachineControlPlaneLabel] = "" + expectedLabels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name + g.Expect(createdMachine.Labels).To(Equal(expectedLabels)) + // Verify annotations. + expectedAnnotations := map[string]string{} + for k, v := range kcpMachineTemplateObjectMeta.Annotations { + expectedAnnotations[k] = v + } + expectedAnnotations[controlplanev1.KubeadmClusterConfigurationAnnotation] = clusterConfigurationString + g.Expect(createdMachine.Annotations).To(Equal(expectedAnnotations)) + + // Verify that machineTemplate.ObjectMeta in KCP has not been modified. + g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).To(Equal(kcpMachineTemplateObjectMetaCopy.Labels)) + g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).To(Equal(kcpMachineTemplateObjectMetaCopy.Annotations)) + }) + + t.Run("should return the correct Machine object when updating an existing Machine", func(t *testing.T) { + g := NewWithT(t) + + machineName := "existing-machine" + machineUID := types.UID("abc-123-existing-machine") + // Use different ClusterConfiguration string than the information present in KCP + // to verify that for an existing machine we do not override this information. + existingClusterConfigurationString := "existing-cluster-configuration-information" + remediationData := "remediation-data" + failureDomain := pointer.String("fd-1") + machineVersion := pointer.String("v1.25.3") + existingMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineName, + UID: machineUID, + Annotations: map[string]string{ + controlplanev1.KubeadmClusterConfigurationAnnotation: existingClusterConfigurationString, + controlplanev1.RemediationForAnnotation: remediationData, + }, + }, + Spec: clusterv1.MachineSpec{ + Version: machineVersion, + FailureDomain: failureDomain, + NodeDrainTimeout: duration10s, + NodeDeletionTimeout: duration10s, + NodeVolumeDetachTimeout: duration10s, + }, + } + + updatedMachine, err := (&KubeadmControlPlaneReconciler{}).computeDesiredMachine( + kcp, cluster, + infraRef, bootstrapRef, + existingMachine.Spec.FailureDomain, existingMachine, + ) + g.Expect(err).To(BeNil()) + + expectedMachineSpec := clusterv1.MachineSpec{ + ClusterName: cluster.Name, + Version: machineVersion, // Should use the Machine version and not the version from KCP. + Bootstrap: clusterv1.Bootstrap{ + ConfigRef: bootstrapRef, + }, + InfrastructureRef: *infraRef, + FailureDomain: failureDomain, + NodeDrainTimeout: kcp.Spec.MachineTemplate.NodeDrainTimeout, + NodeDeletionTimeout: kcp.Spec.MachineTemplate.NodeDeletionTimeout, + NodeVolumeDetachTimeout: kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout, + } + g.Expect(updatedMachine.Namespace).To(Equal(kcp.Namespace)) + g.Expect(updatedMachine.OwnerReferences).To(HaveLen(1)) + g.Expect(updatedMachine.OwnerReferences).To(ContainElement(*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KubeadmControlPlane")))) + g.Expect(updatedMachine.Spec).To(Equal(expectedMachineSpec)) + + // Verify the Name and UID of the Machine remain unchanged + g.Expect(updatedMachine.Name).To(Equal(machineName)) + g.Expect(updatedMachine.UID).To(Equal(machineUID)) + + // Verify that the machineTemplate.ObjectMeta has been propagated to the Machine. + // Verify labels. + expectedLabels := map[string]string{} + for k, v := range kcpMachineTemplateObjectMeta.Labels { + expectedLabels[k] = v + } + expectedLabels[clusterv1.ClusterNameLabel] = cluster.Name + expectedLabels[clusterv1.MachineControlPlaneLabel] = "" + expectedLabels[clusterv1.MachineControlPlaneNameLabel] = kcp.Name + g.Expect(updatedMachine.Labels).To(Equal(expectedLabels)) + // Verify annotations. + expectedAnnotations := map[string]string{} + for k, v := range kcpMachineTemplateObjectMeta.Annotations { + expectedAnnotations[k] = v + } + expectedAnnotations[controlplanev1.KubeadmClusterConfigurationAnnotation] = existingClusterConfigurationString + expectedAnnotations[controlplanev1.RemediationForAnnotation] = remediationData + g.Expect(updatedMachine.Annotations).To(Equal(expectedAnnotations)) + + // Verify that machineTemplate.ObjectMeta in KCP has not been modified. + g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Labels).To(Equal(kcpMachineTemplateObjectMetaCopy.Labels)) + g.Expect(kcp.Spec.MachineTemplate.ObjectMeta.Annotations).To(Equal(kcpMachineTemplateObjectMetaCopy.Annotations)) + }) } func TestKubeadmControlPlaneReconciler_generateKubeadmConfig(t *testing.T) { diff --git a/controlplane/kubeadm/internal/controllers/scale_test.go b/controlplane/kubeadm/internal/controllers/scale_test.go index 873153bffdf5..1cfdbc96f3db 100644 --- a/controlplane/kubeadm/internal/controllers/scale_test.go +++ b/controlplane/kubeadm/internal/controllers/scale_test.go @@ -23,7 +23,9 @@ import ( "time" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -32,22 +34,42 @@ import ( bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" ) func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { - g := NewWithT(t) + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconciler-initializecontrolplane") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane(metav1.NamespaceDefault) + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) - fakeClient := newFakeClient(cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()) + cluster, kcp, genericInfrastructureMachineTemplate := createClusterWithControlPlane(namespace.Name) + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate, client.FieldOwner("manager"))).To(Succeed()) + kcp.UID = types.UID(util.RandomString(10)) r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, + Client: env, recorder: record.NewFakeRecorder(32), managementClusterUncached: &fakeManagementCluster{ - Management: &internal.Management{Client: fakeClient}, + Management: &internal.Management{Client: env}, Workload: fakeWorkloadCluster{}, }, } @@ -61,10 +83,10 @@ func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) machineList := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(env.GetAPIReader().List(ctx, machineList, client.InNamespace(cluster.Namespace))).To(Succeed()) g.Expect(machineList.Items).To(HaveLen(1)) - res, err := collections.GetFilteredMachinesForCluster(ctx, fakeClient, cluster, collections.OwnedMachines(kcp)) + res, err := collections.GetFilteredMachinesForCluster(ctx, env, cluster, collections.OwnedMachines(kcp)) g.Expect(res).To(HaveLen(1)) g.Expect(err).NotTo(HaveOccurred()) @@ -72,9 +94,9 @@ func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { g.Expect(machineList.Items[0].Name).To(HavePrefix(kcp.Name)) g.Expect(machineList.Items[0].Spec.InfrastructureRef.Namespace).To(Equal(cluster.Namespace)) - g.Expect(machineList.Items[0].Spec.InfrastructureRef.Name).To(HavePrefix(genericMachineTemplate.GetName())) - g.Expect(machineList.Items[0].Spec.InfrastructureRef.APIVersion).To(Equal(genericMachineTemplate.GetAPIVersion())) - g.Expect(machineList.Items[0].Spec.InfrastructureRef.Kind).To(Equal("GenericMachine")) + g.Expect(machineList.Items[0].Spec.InfrastructureRef.Name).To(HavePrefix(genericInfrastructureMachineTemplate.GetName())) + g.Expect(machineList.Items[0].Spec.InfrastructureRef.APIVersion).To(Equal(genericInfrastructureMachineTemplate.GetAPIVersion())) + g.Expect(machineList.Items[0].Spec.InfrastructureRef.Kind).To(Equal("GenericInfrastructureMachine")) g.Expect(machineList.Items[0].Spec.Bootstrap.ConfigRef.Namespace).To(Equal(cluster.Namespace)) g.Expect(machineList.Items[0].Spec.Bootstrap.ConfigRef.Name).To(HavePrefix(kcp.Name)) @@ -84,11 +106,31 @@ func TestKubeadmControlPlaneReconciler_initializeControlPlane(t *testing.T) { func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { t.Run("creates a control plane Machine if preflight checks pass", func(t *testing.T) { + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconciler-scaleupcontrolplane") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane(metav1.NamespaceDefault) + cluster, kcp, genericInfrastructureMachineTemplate := createClusterWithControlPlane(namespace.Name) + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate, client.FieldOwner("manager"))).To(Succeed()) + kcp.UID = types.UID(util.RandomString(10)) setKCPHealthy(kcp) - initObjs := []client.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} fmc := &fakeManagementCluster{ Machines: collections.New(), @@ -99,13 +141,10 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true) setMachineHealthy(m) fmc.Machines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) } - fakeClient := newFakeClient(initObjs...) - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, + Client: env, managementCluster: fmc, managementClusterUncached: fmc, recorder: record.NewFakeRecorder(32), @@ -121,12 +160,38 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) controlPlaneMachines := clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, &controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(3)) + g.Expect(env.GetAPIReader().List(ctx, &controlPlaneMachines, client.InNamespace(namespace.Name))).To(Succeed()) + // A new machine should have been created. + // Note: expected length is 1 because only the newly created machine is on API server. Other machines are + // in-memory only during the test. + g.Expect(controlPlaneMachines.Items).To(HaveLen(1)) }) t.Run("does not create a control plane Machine if preflight checks fail", func(t *testing.T) { - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane(metav1.NamespaceDefault) - initObjs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconciler-scaleupcontrolplane") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) + + cluster, kcp, genericInfrastructureMachineTemplate := createClusterWithControlPlane(namespace.Name) + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate, client.FieldOwner("manager"))).To(Succeed()) + kcp.UID = types.UID(util.RandomString(10)) + cluster.UID = types.UID(util.RandomString(10)) cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com" cluster.Spec.ControlPlaneEndpoint.Port = 6443 cluster.Status.InfrastructureReady = true @@ -135,23 +200,20 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { for i := 0; i < 2; i++ { m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster.DeepCopy(), kcp.DeepCopy(), true) beforeMachines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) } - g := NewWithT(t) - - fakeClient := newFakeClient(initObjs...) fmc := &fakeManagementCluster{ Machines: beforeMachines.DeepCopy(), Workload: fakeWorkloadCluster{}, } r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, + Client: env, + APIReader: env.GetAPIReader(), managementCluster: fmc, managementClusterUncached: fmc, recorder: record.NewFakeRecorder(32), + disableInPlacePropagation: true, } result, err := r.reconcile(context.Background(), cluster, kcp) @@ -160,13 +222,15 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { // scaleUpControlPlane is never called due to health check failure and new machine is not created to scale up. controlPlaneMachines := &clusterv1.MachineList{} - g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines))) + g.Expect(env.GetAPIReader().List(context.Background(), controlPlaneMachines, client.InNamespace(namespace.Name))).To(Succeed()) + // No new machine should be created. + // Note: expected length is 0 because no machine is created and hence no machine is on the API server. + // Other machines are in-memory only during the test. + g.Expect(controlPlaneMachines.Items).To(HaveLen(0)) endMachines := collections.FromMachineList(controlPlaneMachines) for _, m := range endMachines { bm, ok := beforeMachines[m.Name] - bm.SetResourceVersion("999") g.Expect(ok).To(BeTrue()) g.Expect(m).To(Equal(bm)) } diff --git a/controlplane/kubeadm/internal/controllers/suite_test.go b/controlplane/kubeadm/internal/controllers/suite_test.go index 47da5c51b0fc..865f39ef5fcc 100644 --- a/controlplane/kubeadm/internal/controllers/suite_test.go +++ b/controlplane/kubeadm/internal/controllers/suite_test.go @@ -20,9 +20,6 @@ import ( "os" "testing" - // +kubebuilder:scaffold:imports - apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/cluster-api/internal/test/envtest" @@ -31,25 +28,6 @@ import ( var ( env *envtest.Environment ctx = ctrl.SetupSignalHandler() - // TODO(sbueringer): move under internal/builder (or refactor it in a way that we don't need it anymore). - fakeGenericMachineTemplateCRD = &apiextensionsv1.CustomResourceDefinition{ - TypeMeta: metav1.TypeMeta{ - APIVersion: apiextensionsv1.SchemeGroupVersion.String(), - Kind: "CustomResourceDefinition", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: "genericmachinetemplates.generic.io", - Labels: map[string]string{ - "cluster.x-k8s.io/v1beta1": "v1", - }, - }, - Spec: apiextensionsv1.CustomResourceDefinitionSpec{ - Group: "generic.io", - Names: apiextensionsv1.CustomResourceDefinitionNames{ - Kind: "GenericMachineTemplate", - }, - }, - } ) func TestMain(m *testing.M) { diff --git a/controlplane/kubeadm/internal/controllers/upgrade_test.go b/controlplane/kubeadm/internal/controllers/upgrade_test.go index 50fdb35a09a0..75f26fe3d860 100644 --- a/controlplane/kubeadm/internal/controllers/upgrade_test.go +++ b/controlplane/kubeadm/internal/controllers/upgrade_test.go @@ -20,10 +20,12 @@ import ( "context" "fmt" "testing" + "time" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" "k8s.io/utils/pointer" ctrl "sigs.k8s.io/controller-runtime" @@ -32,6 +34,8 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" + "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/collections" ) @@ -39,34 +43,57 @@ const UpdatedVersion string = "v1.17.4" const Host string = "nodomain.example.com" func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { + setup := func(t *testing.T, g *WithT) *corev1.Namespace { + t.Helper() + + t.Log("Creating the namespace") + ns, err := env.CreateNamespace(ctx, "test-kcp-reconciler-rollout-scaleup") + g.Expect(err).To(BeNil()) + + return ns + } + + teardown := func(t *testing.T, g *WithT, ns *corev1.Namespace) { + t.Helper() + + t.Log("Deleting the namespace") + g.Expect(env.Delete(ctx, ns)).To(Succeed()) + } + g := NewWithT(t) + namespace := setup(t, g) + defer teardown(t, g, namespace) - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane(metav1.NamespaceDefault) + timeout := 30 * time.Second + + cluster, kcp, genericInfrastructureMachineTemplate := createClusterWithControlPlane(namespace.Name) + g.Expect(env.Create(ctx, genericInfrastructureMachineTemplate, client.FieldOwner("manager"))).To(Succeed()) + cluster.UID = types.UID(util.RandomString(10)) cluster.Spec.ControlPlaneEndpoint.Host = Host cluster.Spec.ControlPlaneEndpoint.Port = 6443 cluster.Status.InfrastructureReady = true + kcp.UID = types.UID(util.RandomString(10)) kcp.Spec.KubeadmConfigSpec.ClusterConfiguration = nil kcp.Spec.Replicas = pointer.Int32(1) setKCPHealthy(kcp) - fakeClient := newFakeClient(fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()) - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - APIReader: fakeClient, + Client: env, + APIReader: env.GetAPIReader(), recorder: record.NewFakeRecorder(32), managementCluster: &fakeManagementCluster{ - Management: &internal.Management{Client: fakeClient}, + Management: &internal.Management{Client: env}, Workload: fakeWorkloadCluster{ Status: internal.ClusterStatus{Nodes: 1}, }, }, managementClusterUncached: &fakeManagementCluster{ - Management: &internal.Management{Client: fakeClient}, + Management: &internal.Management{Client: env}, Workload: fakeWorkloadCluster{ Status: internal.ClusterStatus{Nodes: 1}, }, }, + disableInPlacePropagation: true, } controlPlane := &internal.ControlPlane{ KCP: kcp, @@ -80,8 +107,12 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { // initial setup initialMachine := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, initialMachine, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(initialMachine.Items).To(HaveLen(1)) + g.Eventually(func(g Gomega) { + // Nb. This Eventually block also forces the cache to update so that subsequent + // reconcile and upgradeControlPlane calls use the updated cache and avoids flakiness in the test. + g.Expect(env.List(ctx, initialMachine, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(initialMachine.Items).To(HaveLen(1)) + }, timeout).Should(Succeed()) for i := range initialMachine.Items { setMachineHealthy(&initialMachine.Items[i]) } @@ -96,8 +127,10 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).To(BeNil()) bothMachines := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(bothMachines.Items).To(HaveLen(2)) + g.Eventually(func(g Gomega) { + g.Expect(env.List(ctx, bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(bothMachines.Items).To(HaveLen(2)) + }, timeout).Should(Succeed()) // run upgrade a second time, simulate that the node has not appeared yet but the machine exists @@ -105,8 +138,10 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { result, err = r.reconcile(context.Background(), cluster, kcp) g.Expect(err).ToNot(HaveOccurred()) g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: preflightFailedRequeueAfter})) - g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(bothMachines.Items).To(HaveLen(2)) + g.Eventually(func(g Gomega) { + g.Expect(env.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(bothMachines.Items).To(HaveLen(2)) + }, timeout).Should(Succeed()) // manually increase number of nodes, make control plane healthy again r.managementCluster.(*fakeManagementCluster).Workload.Status.Nodes++ @@ -115,17 +150,24 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleUp(t *testing.T) { } controlPlane.Machines = collections.FromMachineList(bothMachines) + machinesRequireUpgrade := collections.Machines{} + for i := range bothMachines.Items { + if bothMachines.Items[i].Spec.Version != nil && *bothMachines.Items[i].Spec.Version != UpdatedVersion { + machinesRequireUpgrade[bothMachines.Items[i].Name] = &bothMachines.Items[i] + } + } + // run upgrade the second time, expect we scale down - result, err = r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, controlPlane.Machines) + result, err = r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, machinesRequireUpgrade) g.Expect(err).To(BeNil()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) finalMachine := &clusterv1.MachineList{} - g.Expect(fakeClient.List(ctx, finalMachine, client.InNamespace(cluster.Namespace))).To(Succeed()) - g.Expect(finalMachine.Items).To(HaveLen(1)) - - // assert that the deleted machine is the oldest, initial machine - g.Expect(finalMachine.Items[0].Name).ToNot(Equal(initialMachine.Items[0].Name)) - g.Expect(finalMachine.Items[0].CreationTimestamp.Time).To(BeTemporally(">", initialMachine.Items[0].CreationTimestamp.Time)) + g.Eventually(func(g Gomega) { + g.Expect(env.List(ctx, finalMachine, client.InNamespace(cluster.Namespace))).To(Succeed()) + g.Expect(finalMachine.Items).To(HaveLen(1)) + // assert that the deleted machine is the initial machine + g.Expect(finalMachine.Items[0].Name).ToNot(Equal(initialMachine.Items[0].Name)) + }, timeout).Should(Succeed()) } func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleDown(t *testing.T) { @@ -145,7 +187,7 @@ func TestKubeadmControlPlaneReconciler_RolloutStrategy_ScaleDown(t *testing.T) { Status: internal.ClusterStatus{Nodes: 3}, }, } - objs := []client.Object{fakeGenericMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} + objs := []client.Object{builder.GenericInfrastructureMachineTemplateCRD, cluster.DeepCopy(), kcp.DeepCopy(), tmpl.DeepCopy()} for i := 0; i < 3; i++ { name := fmt.Sprintf("test-%d", i) m := &clusterv1.Machine{ diff --git a/controlplane/kubeadm/internal/filters.go b/controlplane/kubeadm/internal/filters.go index b1da909126e5..5dd8a9e016c1 100644 --- a/controlplane/kubeadm/internal/filters.go +++ b/controlplane/kubeadm/internal/filters.go @@ -22,7 +22,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" @@ -32,11 +31,13 @@ import ( // MatchesMachineSpec returns a filter to find all machines that matches with KCP config and do not require any rollout. // Kubernetes version, infrastructure template, and KubeadmConfig field need to be equivalent. +// Note: We don't need to compare the entire MachineSpec to determine if a Machine needs to be rolled out, +// because all the fields in the MachineSpec, except for version, the infrastructureRef and bootstrap.ConfigRef, are either: +// - mutated in-place (ex: NodeDrainTimeout) +// - are not dictated by KCP (ex: ProviderID) +// - are not relevant for the rollout decision (ex: failureDomain). func MatchesMachineSpec(infraConfigs map[string]*unstructured.Unstructured, machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) func(machine *clusterv1.Machine) bool { return collections.And( - func(machine *clusterv1.Machine) bool { - return matchMachineTemplateMetadata(kcp, machine) - }, collections.MatchesKubernetesVersion(kcp.Spec.Version), MatchesKubeadmBootstrapConfig(machineConfigs, kcp), MatchesTemplateClonedFrom(infraConfigs, kcp), @@ -55,7 +56,11 @@ func NeedsRollout(reconciliationTime, rolloutAfter *metav1.Time, rolloutBefore * ) } -// MatchesTemplateClonedFrom returns a filter to find all machines that match a given KCP infra template. +// MatchesTemplateClonedFrom returns a filter to find all machines that have a corresponding infrastructure machine that +// matches a given KCP infra template. +// Note: Differences to the labels and annotations on the infrastructure machine are not considered for matching +// criteria, because changes to labels and annotations are propagated in-place to the infrastructure machines. +// TODO: This function will be renamed in a follow-up PR to something better. (ex: MatchesInfraMachine). func MatchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructured, kcp *controlplanev1.KubeadmControlPlane) collections.Func { return func(machine *clusterv1.Machine) bool { if machine == nil { @@ -82,15 +87,13 @@ func MatchesTemplateClonedFrom(infraConfigs map[string]*unstructured.Unstructure return false } - // Check if the machine template metadata matches with the infrastructure object. - if !matchMachineTemplateMetadata(kcp, infraObj) { - return false - } return true } } // MatchesKubeadmBootstrapConfig checks if machine's KubeadmConfigSpec is equivalent with KCP's KubeadmConfigSpec. +// Note: Differences to the labels and annotations on the KubeadmConfig are not considered for matching +// criteria, because changes to labels and annotations are propagated in-place to KubeadmConfig. func MatchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.KubeadmConfig, kcp *controlplanev1.KubeadmControlPlane) collections.Func { return func(machine *clusterv1.Machine) bool { if machine == nil { @@ -116,11 +119,6 @@ func MatchesKubeadmBootstrapConfig(machineConfigs map[string]*bootstrapv1.Kubead return true } - // Check if the machine template metadata matches with the infrastructure object. - if !matchMachineTemplateMetadata(kcp, machineConfig) { - return false - } - // Check if KCP and machine InitConfiguration or JoinConfiguration matches // NOTE: only one between init configuration and join configuration is set on a machine, depending // on the fact that the machine was the initial control plane node or a joining control plane node. @@ -255,30 +253,3 @@ func cleanupConfigFields(kcpConfig *bootstrapv1.KubeadmConfigSpec, machineConfig machineConfig.Spec.JoinConfiguration.TypeMeta = kcpConfig.JoinConfiguration.TypeMeta } } - -// matchMachineTemplateMetadata matches the machine template object meta information, -// specifically annotations and labels, against an object. -func matchMachineTemplateMetadata(kcp *controlplanev1.KubeadmControlPlane, obj client.Object) bool { - // Check if annotations and labels match. - if !isSubsetMapOf(kcp.Spec.MachineTemplate.ObjectMeta.Annotations, obj.GetAnnotations()) { - return false - } - if !isSubsetMapOf(kcp.Spec.MachineTemplate.ObjectMeta.Labels, obj.GetLabels()) { - return false - } - return true -} - -func isSubsetMapOf(base map[string]string, existing map[string]string) bool { -loopBase: - for key, value := range base { - for existingKey, existingValue := range existing { - if existingKey == key && existingValue == value { - continue loopBase - } - } - // Return false right away if a key value pair wasn't found. - return false - } - return true -} diff --git a/controlplane/kubeadm/internal/filters_test.go b/controlplane/kubeadm/internal/filters_test.go index f6bb921d7c23..79211ccd3ee9 100644 --- a/controlplane/kubeadm/internal/filters_test.go +++ b/controlplane/kubeadm/internal/filters_test.go @@ -937,28 +937,28 @@ func TestMatchesKubeadmBootstrapConfig(t *testing.T) { }, } - t.Run("by returning false if neither labels or annotations match", func(t *testing.T) { + t.Run("by returning true if neither labels or annotations match", func(t *testing.T) { g := NewWithT(t) machineConfigs[m.Name].Annotations = nil machineConfigs[m.Name].Labels = nil f := MatchesKubeadmBootstrapConfig(machineConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) - t.Run("by returning false if only labels don't match", func(t *testing.T) { + t.Run("by returning true if only labels don't match", func(t *testing.T) { g := NewWithT(t) machineConfigs[m.Name].Annotations = kcp.Spec.MachineTemplate.ObjectMeta.Annotations machineConfigs[m.Name].Labels = nil f := MatchesKubeadmBootstrapConfig(machineConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) - t.Run("by returning false if only annotations don't match", func(t *testing.T) { + t.Run("by returning true if only annotations don't match", func(t *testing.T) { g := NewWithT(t) machineConfigs[m.Name].Annotations = nil machineConfigs[m.Name].Labels = kcp.Spec.MachineTemplate.ObjectMeta.Labels f := MatchesKubeadmBootstrapConfig(machineConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) t.Run("by returning true if both labels and annotations match", func(t *testing.T) { @@ -1045,7 +1045,7 @@ func TestMatchesTemplateClonedFrom(t *testing.T) { }, } - t.Run("by returning false if neither labels or annotations match", func(t *testing.T) { + t.Run("by returning true if neither labels or annotations match", func(t *testing.T) { g := NewWithT(t) infraConfigs[m.Name].SetAnnotations(map[string]string{ clusterv1.TemplateClonedFromNameAnnotation: "infra-foo", @@ -1053,10 +1053,10 @@ func TestMatchesTemplateClonedFrom(t *testing.T) { }) infraConfigs[m.Name].SetLabels(nil) f := MatchesTemplateClonedFrom(infraConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) - t.Run("by returning false if only labels don't match", func(t *testing.T) { + t.Run("by returning true if only labels don't match", func(t *testing.T) { g := NewWithT(t) infraConfigs[m.Name].SetAnnotations(map[string]string{ clusterv1.TemplateClonedFromNameAnnotation: "infra-foo", @@ -1065,10 +1065,10 @@ func TestMatchesTemplateClonedFrom(t *testing.T) { }) infraConfigs[m.Name].SetLabels(nil) f := MatchesTemplateClonedFrom(infraConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) - t.Run("by returning false if only annotations don't match", func(t *testing.T) { + t.Run("by returning true if only annotations don't match", func(t *testing.T) { g := NewWithT(t) infraConfigs[m.Name].SetAnnotations(map[string]string{ clusterv1.TemplateClonedFromNameAnnotation: "infra-foo", @@ -1076,7 +1076,7 @@ func TestMatchesTemplateClonedFrom(t *testing.T) { }) infraConfigs[m.Name].SetLabels(kcp.Spec.MachineTemplate.ObjectMeta.Labels) f := MatchesTemplateClonedFrom(infraConfigs, kcp) - g.Expect(f(m)).To(BeFalse()) + g.Expect(f(m)).To(BeTrue()) }) t.Run("by returning true if both labels and annotations match", func(t *testing.T) { diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index ca0358eb1014..27a21b156cda 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -261,7 +261,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster, // Cluster API releases. If we do this only for selected MachineSets, we would have to keep this code forever. for idx := range msList { machineSet := msList[idx] - if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, machineSet, machineDeploymentManagerName, r.Client); err != nil { + if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, machineSet, machineDeploymentManagerName); err != nil { return ctrl.Result{}, errors.Wrapf(err, "failed to clean up managedFields of MachineSet %s", klog.KObj(machineSet)) } } diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index 36646a9e2ac1..f865ab7b3417 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -387,7 +387,7 @@ func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.Mac // We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA) // (< v1.4.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and // "capi-machineset" and then we would not be able to e.g. drop labels and annotations. - if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, m, machineSetManagerName, r.Client); err != nil { + if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, machineSetManagerName); err != nil { return errors.Wrapf(err, "failed to update machine: failed to adjust the managedFields of the Machine %q", m.Name) } diff --git a/internal/util/ssa/managedfields.go b/internal/util/ssa/managedfields.go index e56275468e5a..feb211991888 100644 --- a/internal/util/ssa/managedfields.go +++ b/internal/util/ssa/managedfields.go @@ -109,7 +109,7 @@ func DropManagedFields(ctx context.Context, c client.Client, obj client.Object, // We won't do this on subsequent reconciles. This case will be identified by checking if `ssaManager` owns any fields. // Dropping all existing "manager" entries (which could also be from other controllers) is safe, as we assume that if // other controllers are still writing fields on the object they will just do it again and thus gain ownership again. -func CleanUpManagedFieldsForSSAAdoption(ctx context.Context, obj client.Object, ssaManager string, c client.Client) error { +func CleanUpManagedFieldsForSSAAdoption(ctx context.Context, c client.Client, obj client.Object, ssaManager string) error { // Return if `ssaManager` already owns any fields. if hasFieldsManagedBy(obj, ssaManager) { return nil diff --git a/internal/util/ssa/managedfields_test.go b/internal/util/ssa/managedfields_test.go index 950895a44125..0b4f432867d8 100644 --- a/internal/util/ssa/managedfields_test.go +++ b/internal/util/ssa/managedfields_test.go @@ -251,7 +251,7 @@ func TestCleanUpManagedFieldsForSSAAdoption(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) fakeClient := fake.NewClientBuilder().WithObjects(tt.obj).Build() - g.Expect(CleanUpManagedFieldsForSSAAdoption(ctx, tt.obj, ssaManager, fakeClient)).Should(Succeed()) + g.Expect(CleanUpManagedFieldsForSSAAdoption(ctx, fakeClient, tt.obj, ssaManager)).Should(Succeed()) g.Expect(tt.obj.GetManagedFields()).Should( ContainElement(MatchManagedFieldsEntry(ssaManager, metav1.ManagedFieldsOperationApply))) if tt.wantEntryForClassicManager { From 68cd99acf79a7a70130c2dcf0267b157f78760a0 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Tue, 28 Feb 2023 19:07:49 +0100 Subject: [PATCH 2/3] SSA: Implement request caching --- .../controllers/machine/machine_controller.go | 7 ++ .../machine/machine_controller_noderef.go | 12 +- .../machine/machine_controller_test.go | 11 +- .../machinedeployment_controller.go | 2 + .../machinedeployment_sync.go | 18 ++- .../machinedeployment/mdutil/util.go | 30 ----- .../machineset/machineset_controller.go | 22 ++-- .../machineset/machineset_controller_test.go | 2 +- .../topology/cluster/cluster_controller.go | 7 +- .../topology/cluster/reconcile_state_test.go | 27 ++-- .../cluster/structuredmerge/dryrun.go | 25 +++- .../structuredmerge/serversidepathhelper.go | 3 +- .../serversidepathhelper_test.go | 112 +++++++++++++---- internal/util/hash/hash.go | 45 +++++++ internal/util/ssa/cache.go | 116 ++++++++++++++++++ internal/util/ssa/patch.go | 102 +++++++++++++++ internal/util/ssa/patch_test.go | 82 +++++++++++++ internal/util/ssa/suite_test.go | 53 ++++++++ 18 files changed, 564 insertions(+), 112 deletions(-) create mode 100644 internal/util/hash/hash.go create mode 100644 internal/util/ssa/cache.go create mode 100644 internal/util/ssa/patch.go create mode 100644 internal/util/ssa/patch_test.go create mode 100644 internal/util/ssa/suite_test.go diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go index 743a3957b224..37a8fe56a0ad 100644 --- a/internal/controllers/machine/machine_controller.go +++ b/internal/controllers/machine/machine_controller.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" "sigs.k8s.io/cluster-api/controllers/noderefutil" "sigs.k8s.io/cluster-api/controllers/remote" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/collections" @@ -58,6 +59,10 @@ import ( const ( // controllerName defines the controller used when creating clients. controllerName = "machine-controller" + + // machineManagerName is the manager name used for Server-Side-Apply (SSA) operations + // in the Machine controller. + machineManagerName = "capi-machine" ) var ( @@ -91,6 +96,7 @@ type Reconciler struct { // nodeDeletionRetryTimeout determines how long the controller will retry deleting a node // during a single reconciliation. nodeDeletionRetryTimeout time.Duration + ssaCache ssa.Cache } func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -134,6 +140,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt r.externalTracker = external.ObjectTracker{ Controller: controller, } + r.ssaCache = ssa.NewCache(mgr.GetScheme()) return nil } diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go index d2664aca9133..b52a81dee8fe 100644 --- a/internal/controllers/machine/machine_controller_noderef.go +++ b/internal/controllers/machine/machine_controller_noderef.go @@ -33,6 +33,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/api/v1beta1/index" "sigs.k8s.io/cluster-api/controllers/noderefutil" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" @@ -124,13 +125,10 @@ func (r *Reconciler) reconcileNode(ctx context.Context, cluster *clusterv1.Clust } } - options := []client.PatchOption{ - client.FieldOwner("capi-machine"), - client.ForceOwnership, - } - nodePatch := unstructuredNode(node.Name, node.UID, getManagedLabels(machine.Labels)) - if err := remoteClient.Patch(ctx, nodePatch, client.Apply, options...); err != nil { - return ctrl.Result{}, errors.Wrap(err, "failed to apply patch label to the node") + updatedNode := unstructuredNode(node.Name, node.UID, getManagedLabels(machine.Labels)) + _, err = ssa.Patch(ctx, remoteClient, machineManagerName, updatedNode, ssa.WithCachingProxy{Cache: r.ssaCache, Original: node}) + if err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to apply labels to Node") } // Do the remaining node health checks, then set the node health to true if all checks pass. diff --git a/internal/controllers/machine/machine_controller_test.go b/internal/controllers/machine/machine_controller_test.go index dd448a1a8363..42ca4a7bcf21 100644 --- a/internal/controllers/machine/machine_controller_test.go +++ b/internal/controllers/machine/machine_controller_test.go @@ -42,6 +42,7 @@ import ( "sigs.k8s.io/cluster-api/api/v1beta1/index" "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -726,8 +727,9 @@ func TestReconcileRequest(t *testing.T) { ).WithIndex(&corev1.Node{}, index.NodeProviderIDField, index.NodeByProviderID).Build() r := &Reconciler{ - Client: clientFake, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Client: clientFake, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + ssaCache: ssa.NewCache(clientFake.Scheme()), } result, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&tc.machine)}) @@ -972,8 +974,9 @@ func TestMachineConditions(t *testing.T) { Build() r := &Reconciler{ - Client: clientFake, - Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + Client: clientFake, + Tracker: remote.NewTestClusterCacheTracker(logr.New(log.NullLogSink{}), clientFake, scheme.Scheme, client.ObjectKey{Name: testCluster.Name, Namespace: testCluster.Namespace}), + ssaCache: ssa.NewCache(clientFake.Scheme()), } _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: util.ObjectKey(&machine)}) diff --git a/internal/controllers/machinedeployment/machinedeployment_controller.go b/internal/controllers/machinedeployment/machinedeployment_controller.go index 27a21b156cda..59c0103b67f9 100644 --- a/internal/controllers/machinedeployment/machinedeployment_controller.go +++ b/internal/controllers/machinedeployment/machinedeployment_controller.go @@ -70,6 +70,7 @@ type Reconciler struct { WatchFilterValue string recorder record.EventRecorder + ssaCache ssa.Cache } func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -106,6 +107,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt } r.recorder = mgr.GetEventRecorderFor("machinedeployment-controller") + r.ssaCache = ssa.NewCache(mgr.GetScheme()) return nil } diff --git a/internal/controllers/machinedeployment/machinedeployment_sync.go b/internal/controllers/machinedeployment/machinedeployment_sync.go index 707e3f33c77f..8e98ee8f5dba 100644 --- a/internal/controllers/machinedeployment/machinedeployment_sync.go +++ b/internal/controllers/machinedeployment/machinedeployment_sync.go @@ -40,6 +40,8 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/internal/controllers/machinedeployment/mdutil" + "sigs.k8s.io/cluster-api/internal/util/hash" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -153,14 +155,12 @@ func (r *Reconciler) updateMachineSet(ctx context.Context, deployment *clusterv1 } // Update the MachineSet to propagate in-place mutable fields from the MachineDeployment. - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(machineDeploymentManagerName), - } - if err := r.Client.Patch(ctx, updatedMS, client.Apply, patchOptions...); err != nil { + updatedObject, err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, updatedMS, ssa.WithCachingProxy{Cache: r.ssaCache, Original: ms}) + if err != nil { r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedUpdate", "Failed to update MachineSet %s: %v", klog.KObj(updatedMS), err) return nil, errors.Wrapf(err, "failed to update MachineSet %s", klog.KObj(updatedMS)) } + updatedMS = updatedObject.(*clusterv1.MachineSet) log.V(4).Info("Updated MachineSet", "MachineSet", klog.KObj(updatedMS)) return updatedMS, nil @@ -178,11 +178,7 @@ func (r *Reconciler) createMachineSetAndWait(ctx context.Context, deployment *cl } // Create the MachineSet. - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(machineDeploymentManagerName), - } - if err = r.Client.Patch(ctx, newMS, client.Apply, patchOptions...); err != nil { + if _, err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, newMS); err != nil { r.recorder.Eventf(deployment, corev1.EventTypeWarning, "FailedCreate", "Failed to create MachineSet %s: %v", klog.KObj(newMS), err) return nil, errors.Wrapf(err, "failed to create new MachineSet %s", klog.KObj(newMS)) } @@ -237,7 +233,7 @@ func (r *Reconciler) computeDesiredMachineSet(deployment *clusterv1.MachineDeplo // As a result, we use the hash of the machine template while ignoring all in-place mutable fields, i.e. the // machine template with only fields that could trigger a rollout for the machine-template-hash, making it // independent of the changes to any in-place mutable fields. - templateHash, err := mdutil.ComputeSpewHash(mdutil.MachineTemplateDeepCopyRolloutFields(&deployment.Spec.Template)) + templateHash, err := hash.Compute(mdutil.MachineTemplateDeepCopyRolloutFields(&deployment.Spec.Template)) if err != nil { return nil, errors.Wrap(err, "failed to compute desired MachineSet: failed to compute machine template hash") } diff --git a/internal/controllers/machinedeployment/mdutil/util.go b/internal/controllers/machinedeployment/mdutil/util.go index deba1edf90ec..c4957454153c 100644 --- a/internal/controllers/machinedeployment/mdutil/util.go +++ b/internal/controllers/machinedeployment/mdutil/util.go @@ -19,13 +19,10 @@ package mdutil import ( "fmt" - "hash" - "hash/fnv" "sort" "strconv" "strings" - "github.com/davecgh/go-spew/spew" "github.com/go-logr/logr" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -688,33 +685,6 @@ func CloneSelectorAndAddLabel(selector *metav1.LabelSelector, labelKey, labelVal return newSelector } -// SpewHashObject writes specified object to hash using the spew library -// which follows pointers and prints actual values of the nested objects -// ensuring the hash does not change when a pointer changes. -func SpewHashObject(hasher hash.Hash, objectToWrite interface{}) error { - hasher.Reset() - printer := spew.ConfigState{ - Indent: " ", - SortKeys: true, - DisableMethods: true, - SpewKeys: true, - } - - if _, err := printer.Fprintf(hasher, "%#v", objectToWrite); err != nil { - return fmt.Errorf("failed to write object to hasher") - } - return nil -} - -// ComputeSpewHash computes the hash of a MachineTemplateSpec using the spew library. -func ComputeSpewHash(objectToWrite interface{}) (uint32, error) { - machineTemplateSpecHasher := fnv.New32a() - if err := SpewHashObject(machineTemplateSpecHasher, objectToWrite); err != nil { - return 0, err - } - return machineTemplateSpecHasher.Sum32(), nil -} - // GetDeletingMachineCount gets the number of machines that are in the process of being deleted // in a machineList. func GetDeletingMachineCount(machineList *clusterv1.MachineList) int32 { diff --git a/internal/controllers/machineset/machineset_controller.go b/internal/controllers/machineset/machineset_controller.go index f865ab7b3417..5f6a1a457d8f 100644 --- a/internal/controllers/machineset/machineset_controller.go +++ b/internal/controllers/machineset/machineset_controller.go @@ -85,6 +85,7 @@ type Reconciler struct { // WatchFilterValue is the label value used to filter events prior to reconciliation. WatchFilterValue string + ssaCache ssa.Cache recorder record.EventRecorder } @@ -122,6 +123,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt } r.recorder = mgr.GetEventRecorderFor("machineset-controller") + r.ssaCache = ssa.NewCache(mgr.GetScheme()) return nil } @@ -393,14 +395,12 @@ func (r *Reconciler) syncMachines(ctx context.Context, machineSet *clusterv1.Mac // Update Machine to propagate in-place mutable fields from the MachineSet. updatedMachine := r.computeDesiredMachine(machineSet, m) - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(machineSetManagerName), - } - if err := r.Client.Patch(ctx, updatedMachine, client.Apply, patchOptions...); err != nil { + updatedObject, err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: m}) + if err != nil { log.Error(err, "failed to update Machine", "Machine", klog.KObj(updatedMachine)) return errors.Wrapf(err, "failed to update Machine %q", klog.KObj(updatedMachine)) } + updatedMachine = updatedObject.(*clusterv1.Machine) machines[i] = updatedMachine infraMachine, err := external.Get(ctx, r.Client, &updatedMachine.Spec.InfrastructureRef, updatedMachine.Namespace) @@ -529,11 +529,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet, machine.Spec.InfrastructureRef = *infraRef // Create the Machine. - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(machineSetManagerName), - } - if err := r.Client.Patch(ctx, machine, client.Apply, patchOptions...); err != nil { + if _, err := ssa.Patch(ctx, r.Client, machineSetManagerName, machine); err != nil { log.Error(err, "Error while creating a machine") r.recorder.Eventf(ms, corev1.EventTypeWarning, "FailedCreate", "Failed to create machine: %v", err) errs = append(errs, err) @@ -676,11 +672,7 @@ func (r *Reconciler) updateExternalObject(ctx context.Context, obj client.Object updatedObject.SetLabels(machineLabelsFromMachineSet(machineSet)) updatedObject.SetAnnotations(machineAnnotationsFromMachineSet(machineSet)) - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(machineSetManagerName), - } - if err := r.Client.Patch(ctx, updatedObject, client.Apply, patchOptions...); err != nil { + if _, err := ssa.Patch(ctx, r.Client, machineSetManagerName, updatedObject, ssa.WithCachingProxy{Cache: r.ssaCache, Original: obj}); err != nil { return errors.Wrapf(err, "failed to update %s", klog.KObj(obj)) } return nil diff --git a/internal/controllers/machineset/machineset_controller_test.go b/internal/controllers/machineset/machineset_controller_test.go index e767f930caef..485a6d29d0f0 100644 --- a/internal/controllers/machineset/machineset_controller_test.go +++ b/internal/controllers/machineset/machineset_controller_test.go @@ -1175,7 +1175,7 @@ func TestMachineSetReconciler_syncMachines(t *testing.T) { // Run syncMachines to clean up managed fields and have proper field ownership // for Machines, InfrastructureMachines and BootstrapConfigs. - reconciler := &Reconciler{Client: env} + reconciler := &Reconciler{Client: env, ssaCache: ssa.NewCache(env.GetScheme())} g.Expect(reconciler.syncMachines(ctx, ms, machines)).To(Succeed()) // The inPlaceMutatingMachine should have cleaned up managed fields. diff --git a/internal/controllers/topology/cluster/cluster_controller.go b/internal/controllers/topology/cluster/cluster_controller.go index f47e3b5e7cdb..d25dbfcea7aa 100644 --- a/internal/controllers/topology/cluster/cluster_controller.go +++ b/internal/controllers/topology/cluster/cluster_controller.go @@ -47,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api/internal/hooks" tlog "sigs.k8s.io/cluster-api/internal/log" runtimeclient "sigs.k8s.io/cluster-api/internal/runtime/client" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/internal/webhooks" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" @@ -118,7 +119,7 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt r.patchEngine = patches.NewEngine(r.RuntimeClient) r.recorder = mgr.GetEventRecorderFor("topology/cluster") if r.patchHelperFactory == nil { - r.patchHelperFactory = serverSideApplyPatchHelperFactory(r.Client) + r.patchHelperFactory = serverSideApplyPatchHelperFactory(r.Client, ssa.NewCache(mgr.GetScheme())) } return nil } @@ -394,9 +395,9 @@ func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Clu } // serverSideApplyPatchHelperFactory makes use of managed fields provided by server side apply and is used by the controller. -func serverSideApplyPatchHelperFactory(c client.Client) structuredmerge.PatchHelperFactoryFunc { +func serverSideApplyPatchHelperFactory(c client.Client, ssaCache ssa.Cache) structuredmerge.PatchHelperFactoryFunc { return func(ctx context.Context, original, modified client.Object, opts ...structuredmerge.HelperOption) (structuredmerge.PatchHelper, error) { - return structuredmerge.NewServerSidePatchHelper(ctx, original, modified, c, opts...) + return structuredmerge.NewServerSidePatchHelper(ctx, original, modified, c, ssaCache, opts...) } } diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index cbb675989c63..f7c074a93b2a 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -47,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api/internal/hooks" fakeruntimeclient "sigs.k8s.io/cluster-api/internal/runtime/client/fake" "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/internal/util/ssa" ) var ( @@ -87,7 +88,7 @@ func TestReconcileShim(t *testing.T) { r := Reconciler{ Client: env, APIReader: env.GetAPIReader(), - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), } err = r.reconcileClusterShim(ctx, s) g.Expect(err).ToNot(HaveOccurred()) @@ -130,7 +131,7 @@ func TestReconcileShim(t *testing.T) { r := Reconciler{ Client: env, APIReader: env.GetAPIReader(), - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), } err = r.reconcileClusterShim(ctx, s) g.Expect(err).ToNot(HaveOccurred()) @@ -180,7 +181,7 @@ func TestReconcileShim(t *testing.T) { r := Reconciler{ Client: env, APIReader: env.GetAPIReader(), - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), } err = r.reconcileClusterShim(ctx, s) g.Expect(err).ToNot(HaveOccurred()) @@ -231,7 +232,7 @@ func TestReconcileShim(t *testing.T) { r := Reconciler{ Client: env, APIReader: env.GetAPIReader(), - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), } err = r.reconcileClusterShim(ctx, s) g.Expect(err).ToNot(HaveOccurred()) @@ -272,7 +273,7 @@ func TestReconcileShim(t *testing.T) { r := Reconciler{ Client: nil, APIReader: env.GetAPIReader(), - patchHelperFactory: serverSideApplyPatchHelperFactory(nil), + patchHelperFactory: serverSideApplyPatchHelperFactory(nil, ssa.NewCache(env.GetScheme())), } err = r.reconcileClusterShim(ctx, s) g.Expect(err).ToNot(HaveOccurred()) @@ -962,7 +963,7 @@ func TestReconcileCluster(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } err = r.reconcileCluster(ctx, s) @@ -1087,7 +1088,7 @@ func TestReconcileInfrastructureCluster(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } err = r.reconcileInfrastructureCluster(ctx, s) @@ -1341,7 +1342,7 @@ func TestReconcileControlPlane(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } @@ -1601,7 +1602,7 @@ func TestReconcileControlPlaneMachineHealthCheck(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } @@ -1850,7 +1851,7 @@ func TestReconcileMachineDeployments(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } err = r.reconcileMachineDeployments(ctx, s) @@ -2329,7 +2330,7 @@ func TestReconcileReferencedObjectSequences(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } @@ -2595,7 +2596,7 @@ func TestReconcileMachineDeploymentMachineHealthCheck(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } @@ -2741,7 +2742,7 @@ func TestReconciler_reconcileMachineHealthCheck(t *testing.T) { r := Reconciler{ Client: env, - patchHelperFactory: serverSideApplyPatchHelperFactory(env), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache(env.GetScheme())), recorder: env.GetEventRecorderFor("test"), } if tt.current != nil { diff --git a/internal/controllers/topology/cluster/structuredmerge/dryrun.go b/internal/controllers/topology/cluster/structuredmerge/dryrun.go index d7c183c362dc..ac6221d243f7 100644 --- a/internal/controllers/topology/cluster/structuredmerge/dryrun.go +++ b/internal/controllers/topology/cluster/structuredmerge/dryrun.go @@ -34,6 +34,8 @@ import ( type dryRunSSAPatchInput struct { client client.Client + // ssaCache caches SSA request to allow us to only run SSA when we actually have to. + ssaCache ssa.Cache // originalUnstructured contains the current state of the object. // Note: We will run SSA dry-run on originalUnstructured and modifiedUnstructured and then compare them. originalUnstructured *unstructured.Unstructured @@ -46,6 +48,22 @@ type dryRunSSAPatchInput struct { // dryRunSSAPatch uses server side apply dry run to determine if the operation is going to change the actual object. func dryRunSSAPatch(ctx context.Context, dryRunCtx *dryRunSSAPatchInput) (bool, bool, error) { + // Compute a request identifier. + // The identifier is unique for a specific request to ensure we don't have to re-run the request + // once we found out that it would not produce a diff. + // The identifier consists of: gvk, namespace and name of originalUnstructured and a hash of modifiedUnstructured. + // This ensures that we re-run the request as soon as either original or modified changes. + requestIdentifier, err := ssa.ComputeRequestIdentifier(dryRunCtx.client.Scheme(), dryRunCtx.originalUnstructured, dryRunCtx.modifiedUnstructured) + if err != nil { + return false, false, err + } + + // Check if we already ran this request before by checking if the cache already contains this identifier. + // Note: We only add an identifier to the cache if the result of the dry run was no diff. + if exists := dryRunCtx.ssaCache.Has(requestIdentifier); exists { + return false, false, nil + } + // For dry run we use the same options as for the intent but with adding metadata.managedFields // to ensure that changes to ownership are detected. filterObjectInput := &ssa.FilterObjectInput{ @@ -62,7 +80,7 @@ func dryRunSSAPatch(ctx context.Context, dryRunCtx *dryRunSSAPatchInput) (bool, } // Do a server-side apply dry-run with modifiedUnstructured to get the updated object. - err := dryRunCtx.client.Patch(ctx, dryRunCtx.modifiedUnstructured, client.Apply, client.DryRunAll, client.FieldOwner(TopologyManagerName), client.ForceOwnership) + err = dryRunCtx.client.Patch(ctx, dryRunCtx.modifiedUnstructured, client.Apply, client.DryRunAll, client.FieldOwner(TopologyManagerName), client.ForceOwnership) if err != nil { // This catches errors like metadata.uid changes. return false, false, errors.Wrap(err, "server side apply dry-run failed for modified object") @@ -147,6 +165,11 @@ func dryRunSSAPatch(ctx context.Context, dryRunCtx *dryRunSSAPatchInput) (bool, hasChanges := len(diff.Object) > 0 _, hasSpecChanges := diff.Object["spec"] + // If there is no diff add the request identifier to the cache. + if !hasChanges { + dryRunCtx.ssaCache.Add(requestIdentifier) + } + return hasChanges, hasSpecChanges, nil } diff --git a/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper.go b/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper.go index 7d1a2b617cf9..c0fed6646279 100644 --- a/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper.go +++ b/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper.go @@ -39,7 +39,7 @@ type serverSidePatchHelper struct { } // NewServerSidePatchHelper returns a new PatchHelper using server side apply. -func NewServerSidePatchHelper(ctx context.Context, original, modified client.Object, c client.Client, opts ...HelperOption) (PatchHelper, error) { +func NewServerSidePatchHelper(ctx context.Context, original, modified client.Object, c client.Client, ssaCache ssa.Cache, opts ...HelperOption) (PatchHelper, error) { // Create helperOptions for filtering the original and modified objects to the desired intent. helperOptions := newHelperOptions(modified, opts...) @@ -99,6 +99,7 @@ func NewServerSidePatchHelper(ctx context.Context, original, modified client.Obj var err error hasChanges, hasSpecChanges, err = dryRunSSAPatch(ctx, &dryRunSSAPatchInput{ client: c, + ssaCache: ssaCache, originalUnstructured: originalUnstructured, modifiedUnstructured: modifiedUnstructured.DeepCopy(), helperOptions: helperOptions, diff --git a/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper_test.go b/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper_test.go index 83127aed2673..f80be91a42d8 100644 --- a/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper_test.go +++ b/internal/controllers/topology/cluster/structuredmerge/serversidepathhelper_test.go @@ -42,6 +42,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/internal/test/builder" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util/patch" ) @@ -72,7 +73,7 @@ func TestServerSideApply(t *testing.T) { var original *unstructured.Unstructured modified := obj.DeepCopy() - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme())) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -83,7 +84,7 @@ func TestServerSideApply(t *testing.T) { var original *clusterv1.MachineDeployment modified := obj.DeepCopy() - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme())) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -92,7 +93,7 @@ func TestServerSideApply(t *testing.T) { g := NewWithT(t) // Create a patch helper with original == nil and modified == obj, ensure this is detected as operation that triggers changes. - p0, err := NewServerSidePatchHelper(ctx, nil, obj.DeepCopy(), env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, nil, obj.DeepCopy(), env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -127,7 +128,7 @@ func TestServerSideApply(t *testing.T) { // Create a patch helper for a modified object with no changes. modified := obj.DeepCopy() - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -144,7 +145,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() g.Expect(unstructured.SetNestedField(modified.Object, "changed", "status", "foo")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -161,7 +162,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "bar")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -178,7 +179,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() modified.SetLabels(map[string]string{"foo": "changed"}) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -195,7 +196,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() modified.SetAnnotations(map[string]string{"foo": "changed"}) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -219,7 +220,7 @@ func TestServerSideApply(t *testing.T) { }, }) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -236,7 +237,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "foo")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -269,7 +270,7 @@ func TestServerSideApply(t *testing.T) { // it here to be able to verify that managed field changes are ignored. This is the same situation as when // other controllers update .status (that is ignored) and the ServerSidePatchHelper then ignores the corresponding // managed field changes. - p0, err := NewServerSidePatchHelper(ctx, original, original, env.GetClient(), IgnorePaths{{"spec", "foo"}, {"spec", "bar"}}) + p0, err := NewServerSidePatchHelper(ctx, original, original, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}, {"spec", "bar"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -285,7 +286,7 @@ func TestServerSideApply(t *testing.T) { // Create a patch helper for a modified object with no changes to what previously applied by th topology manager. modified := obj.DeepCopy() - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -332,7 +333,7 @@ func TestServerSideApply(t *testing.T) { modified := obj.DeepCopy() g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "controlPlaneEndpoint", "host")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -370,7 +371,7 @@ func TestServerSideApply(t *testing.T) { g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "controlPlaneEndpoint", "host")).To(Succeed()) g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "bar")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -410,7 +411,7 @@ func TestServerSideApply(t *testing.T) { g.Expect(unstructured.SetNestedField(modified.Object, "changed", "spec", "controlPlaneEndpoint", "host")).To(Succeed()) g.Expect(unstructured.SetNestedField(modified.Object, "changed-by-topology-controller", "spec", "bar")).To(Succeed()) - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), IgnorePaths{{"spec", "foo"}}) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme()), IgnorePaths{{"spec", "foo"}}) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -458,7 +459,7 @@ func TestServerSideApply(t *testing.T) { g.Expect(env.GetAPIReader().Get(ctx, client.ObjectKeyFromObject(original), original)).To(Succeed()) // Create a patch helper for a modified object with which has no changes. - p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + p0, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme())) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeFalse()) g.Expect(p0.HasSpecChanges()).To(BeFalse()) @@ -480,7 +481,7 @@ func TestServerSideApply(t *testing.T) { modified.SetUID("") // Create a patch helper which should fail because original's real UID changed. - _, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + _, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme())) g.Expect(err).To(HaveOccurred()) }) t.Run("Error on object which does not exist (anymore) but was expected to get updated", func(t *testing.T) { @@ -497,7 +498,7 @@ func TestServerSideApply(t *testing.T) { original.SetUID("does-not-exist") // Create a patch helper which should fail because original does not exist. - _, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + _, err := NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssa.NewCache(env.GetScheme())) g.Expect(err).To(HaveOccurred()) }) } @@ -532,7 +533,7 @@ func TestServerSideApplyWithDefaulting(t *testing.T) { // Setup webhook with the manager. // Note: The webhooks is not active yet, as the MutatingWebhookConfiguration will be deployed later. - mutatingWebhookConfiguration, err := setupWebhookWithManager(ns) + defaulter, mutatingWebhookConfiguration, err := setupWebhookWithManager(ns) g.Expect(err).ToNot(HaveOccurred()) // Calculate KubeadmConfigTemplate. @@ -637,8 +638,11 @@ func TestServerSideApplyWithDefaulting(t *testing.T) { // in multiple test runs, because after the first test run it has a resourceVersion set. mutatingWebhookConfiguration := mutatingWebhookConfiguration.DeepCopy() + // Create a cache to cache SSA requests. + ssaCache := ssa.NewCache(env.GetScheme()) + // Create the initial KubeadmConfigTemplate (with the old defaulting logic). - p0, err := NewServerSidePatchHelper(ctx, nil, kct.DeepCopy(), env.GetClient()) + p0, err := NewServerSidePatchHelper(ctx, nil, kct.DeepCopy(), env.GetClient(), ssaCache) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(BeTrue()) g.Expect(p0.HasSpecChanges()).To(BeTrue()) @@ -689,7 +693,7 @@ func TestServerSideApplyWithDefaulting(t *testing.T) { } // Apply modified. - p0, err = NewServerSidePatchHelper(ctx, original, modified, env.GetClient()) + p0, err = NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssaCache) g.Expect(err).ToNot(HaveOccurred()) g.Expect(p0.HasChanges()).To(Equal(tt.expectChanges)) g.Expect(p0.HasSpecChanges()).To(Equal(tt.expectSpecChanges)) @@ -721,6 +725,58 @@ func TestServerSideApplyWithDefaulting(t *testing.T) { g.Expect(specTemplateSpecFieldV1).ToNot(HaveKey("f:users")) } }, 2*time.Second).Should(Succeed()) + + if p0.HasChanges() { + // If there were changes the request should not be cached. + // Which means on the next call we should not hit the cache and thus + // send a request to the server. + // We verify this by checking the webhook call counter. + + // Get original. + original = kct.DeepCopy() + g.Expect(env.Get(ctx, client.ObjectKeyFromObject(original), original)) + + countBefore := defaulter.Counter + + // Apply modified again. + p0, err = NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssaCache) + g.Expect(err).ToNot(HaveOccurred()) + + // Expect no changes. + g.Expect(p0.HasChanges()).To(BeFalse()) + g.Expect(p0.HasSpecChanges()).To(BeFalse()) + g.Expect(p0.Patch(ctx)).To(Succeed()) + + // Expect webhook to be called. + g.Expect(defaulter.Counter).To(Equal(countBefore+2), + "request should not have been cached and thus we expect the webhook to be called twice (once for original and once for modified)") + + // Note: Now the request is also cached, which we verify below. + } + + // If there were no changes the request is now cached. + // Which means on the next call we should only hit the cache and thus + // don't send a request to the server. + // We verify this by checking the webhook call counter. + + // Get original. + original = kct.DeepCopy() + g.Expect(env.Get(ctx, client.ObjectKeyFromObject(original), original)) + + countBefore := defaulter.Counter + + // Apply modified again. + p0, err = NewServerSidePatchHelper(ctx, original, modified, env.GetClient(), ssaCache) + g.Expect(err).ToNot(HaveOccurred()) + + // Expect no changes. + g.Expect(p0.HasChanges()).To(BeFalse()) + g.Expect(p0.HasSpecChanges()).To(BeFalse()) + g.Expect(p0.Patch(ctx)).To(Succeed()) + + // Expect webhook to not be called. + g.Expect(defaulter.Counter).To(Equal(countBefore), + "request should have been cached and thus the webhook not called") }) } } @@ -728,7 +784,7 @@ func TestServerSideApplyWithDefaulting(t *testing.T) { // setupWebhookWithManager configures the envtest manager / webhook server to serve the webhook. // It also calculates and returns the corresponding MutatingWebhookConfiguration. // Note: To activate the webhook, the MutatingWebhookConfiguration has to be deployed. -func setupWebhookWithManager(ns *corev1.Namespace) (*admissionv1.MutatingWebhookConfiguration, error) { +func setupWebhookWithManager(ns *corev1.Namespace) (*KubeadmConfigTemplateTestDefaulter, *admissionv1.MutatingWebhookConfiguration, error) { webhookServer := env.Manager.GetWebhookServer() // Calculate webhook host and path. @@ -741,13 +797,14 @@ func setupWebhookWithManager(ns *corev1.Namespace) (*admissionv1.MutatingWebhook // Serve KubeadmConfigTemplateTestDefaulter on the webhook server. // Note: This should only ever be called once with the same path, otherwise we get a panic. + defaulter := &KubeadmConfigTemplateTestDefaulter{} webhookServer.Register(webhookPath, - admission.WithCustomDefaulter(&bootstrapv1.KubeadmConfigTemplate{}, &KubeadmConfigTemplateTestDefaulter{})) + admission.WithCustomDefaulter(&bootstrapv1.KubeadmConfigTemplate{}, defaulter)) // Calculate the MutatingWebhookConfiguration caBundle, err := os.ReadFile(filepath.Join(webhookServer.CertDir, webhookServer.CertName)) if err != nil { - return nil, err + return nil, nil, err } sideEffectNone := admissionv1.SideEffectClassNone @@ -785,20 +842,23 @@ func setupWebhookWithManager(ns *corev1.Namespace) (*admissionv1.MutatingWebhook }, }, } - return webhookConfig, nil + return defaulter, webhookConfig, nil } var _ webhook.CustomDefaulter = &KubeadmConfigTemplateTestDefaulter{} type KubeadmConfigTemplateTestDefaulter struct { + Counter int } -func (d KubeadmConfigTemplateTestDefaulter) Default(_ context.Context, obj runtime.Object) error { +func (d *KubeadmConfigTemplateTestDefaulter) Default(_ context.Context, obj runtime.Object) error { kct, ok := obj.(*bootstrapv1.KubeadmConfigTemplate) if !ok { return apierrors.NewBadRequest(fmt.Sprintf("expected a Cluster but got a %T", obj)) } + d.Counter++ + defaultKubeadmConfigTemplate(kct) return nil } diff --git a/internal/util/hash/hash.go b/internal/util/hash/hash.go new file mode 100644 index 000000000000..06eaabbd84dc --- /dev/null +++ b/internal/util/hash/hash.go @@ -0,0 +1,45 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package hash provides utils to calculate hashes. +package hash + +import ( + "fmt" + "hash/fnv" + + "github.com/davecgh/go-spew/spew" +) + +// Compute computes the hash of an object using the spew library. +// Note: spew follows pointers and prints actual values of the nested objects +// ensuring the hash does not change when a pointer changes. +func Compute(obj interface{}) (uint32, error) { + hasher := fnv.New32a() + + printer := spew.ConfigState{ + Indent: " ", + SortKeys: true, + DisableMethods: true, + SpewKeys: true, + } + + if _, err := printer.Fprintf(hasher, "%#v", obj); err != nil { + return 0, fmt.Errorf("failed to calculate hash") + } + + return hasher.Sum32(), nil +} diff --git a/internal/util/ssa/cache.go b/internal/util/ssa/cache.go new file mode 100644 index 000000000000..3c829ed356c5 --- /dev/null +++ b/internal/util/ssa/cache.go @@ -0,0 +1,116 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssa + +import ( + "fmt" + "time" + + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + + "sigs.k8s.io/cluster-api/internal/util/hash" +) + +const ( + // ttl is the duration for which we keep the keys in the cache. + ttl = 10 * time.Minute + + // expirationInterval is the interval in which we will remove expired keys + // from the cache. + expirationInterval = 10 * time.Hour +) + +// Cache caches SSA request results. +// Specifically we only use it to cache that a certain request +// doesn't have to be repeated anymore because there was no diff. +type Cache interface { + // Add adds the given key to the Cache. + // Note: keys expire after the ttl. + Add(key string) + + // Has checks if the given key (still) exists in the Cache. + // Note: keys expire after the ttl. + Has(key string) bool +} + +// NewCache creates a new cache. +func NewCache(scheme *runtime.Scheme) Cache { + r := &ssaCache{ + scheme: scheme, + Store: cache.NewTTLStore(func(obj interface{}) (string, error) { + // We only add strings to the cache, so it's safe to cast to string. + return obj.(string), nil + }, ttl), + } + go func() { + for { + // Call list to clear the cache of expired items. + r.List() + + time.Sleep(expirationInterval) + } + }() + return r +} + +type ssaCache struct { + cache.Store + scheme *runtime.Scheme +} + +// Add adds the given key to the Cache. +// Note: keys expire after the ttl. +func (r *ssaCache) Add(key string) { + // Note: We can ignore the error here because by only allowing strings + // and providing the corresponding keyFunc ourselves we can guarantee that + // the error never occurs. + _ = r.Store.Add(key) +} + +// Has checks if the given key (still) exists in the Cache. +// Note: keys expire after the ttl. +func (r *ssaCache) Has(key string) bool { + // Note: We can ignore the error here because GetByKey never returns an error. + _, exists, _ := r.Store.GetByKey(key) + return exists +} + +// ComputeRequestIdentifier computes a request identifier for the cache. +// The identifier is unique for a specific request to ensure we don't have to re-run the request +// once we found out that it would not produce a diff. +// The identifier consists of: gvk, namespace and name of the original object and a hash of the modified +// object. This ensures that we re-run the request as soon as either original or modified changes. +// We cannot use a hash of the original object as we don't want to re-run the request if e.g. the status of +// the original object changed. +func ComputeRequestIdentifier(scheme *runtime.Scheme, original, modified client.Object) (string, error) { + modifiedObjectHash, err := hash.Compute(modified) + if err != nil { + return "", errors.Wrapf(err, "failed to calculate request identifier: failed to compute hash for modified object") + } + + gvk, err := apiutil.GVKForObject(original, scheme) + if err != nil { + return "", errors.Wrapf(err, "failed to calculate request identifier: failed to get GroupVersionKind of original object %s", klog.KObj(original)) + } + + return fmt.Sprintf("%s.%s.%s.%d", gvk.String(), klog.KObj(original), original.GetResourceVersion(), modifiedObjectHash), nil +} diff --git a/internal/util/ssa/patch.go b/internal/util/ssa/patch.go new file mode 100644 index 000000000000..fa1127a6685c --- /dev/null +++ b/internal/util/ssa/patch.go @@ -0,0 +1,102 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssa + +import ( + "context" + + "github.com/pkg/errors" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/apiutil" +) + +// Option is the interface for configuration that modifies Options for a patch request. +type Option interface { + // ApplyToOptions applies this configuration to the given Options. + ApplyToOptions(*Options) +} + +// WithCachingProxy enables caching for the patch request. +// The original and modified object will be used to generate an +// identifier for the request. +// The cache will be used to cache the result of the request. +type WithCachingProxy struct { + Cache Cache + Original client.Object +} + +// ApplyToOptions applies WithCachingProxy to the given Options. +func (w WithCachingProxy) ApplyToOptions(in *Options) { + in.WithCachingProxy = true + in.Cache = w.Cache + in.Original = w.Original +} + +// Options contains the options for the Patch func. +type Options struct { + WithCachingProxy bool + Cache Cache + Original client.Object +} + +// Patch executes an SSA patch. +// If WithCachingProxy is set and the request didn't change the object +// we will cache this result, so subsequent calls don't have to run SSA again. +func Patch(ctx context.Context, c client.Client, fieldManager string, modified client.Object, opts ...Option) (client.Object, error) { + // Calculate the options. + options := &Options{} + for _, opt := range opts { + opt.ApplyToOptions(options) + } + + var requestIdentifier string + var err error + if options.WithCachingProxy { + // Check if the request is cached. + requestIdentifier, err = ComputeRequestIdentifier(c.Scheme(), options.Original, modified) + if err != nil { + return nil, errors.Wrapf(err, "failed to apply object") + } + if options.Cache.Has(requestIdentifier) { + // If the request is cached return the original object. + return options.Original, nil + } + } + + gvk, err := apiutil.GVKForObject(modified, c.Scheme()) + if err != nil { + return nil, errors.Wrapf(err, "failed to apply object: failed to get GroupVersionKind of modified object %s", klog.KObj(modified)) + } + + patchOptions := []client.PatchOption{ + client.ForceOwnership, + client.FieldOwner(fieldManager), + } + if err := c.Patch(ctx, modified, client.Apply, patchOptions...); err != nil { + return nil, errors.Wrapf(err, "failed to apply %s %s", gvk.Kind, klog.KObj(modified)) + } + + if options.WithCachingProxy { + // If the SSA call did not update the object, add the request to the cache. + if options.Original.GetResourceVersion() == modified.GetResourceVersion() { + options.Cache.Add(requestIdentifier) + } + } + + return modified, nil +} diff --git a/internal/util/ssa/patch_test.go b/internal/util/ssa/patch_test.go new file mode 100644 index 000000000000..1fc02765140a --- /dev/null +++ b/internal/util/ssa/patch_test.go @@ -0,0 +1,82 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssa + +import ( + "testing" + + . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" + + "sigs.k8s.io/cluster-api/internal/test/builder" +) + +func TestPatch(t *testing.T) { + g := NewWithT(t) + + // Create a namespace for running the test + ns, err := env.CreateNamespace(ctx, "ssa") + g.Expect(err).ToNot(HaveOccurred()) + + // Build the test object to work with. + initialObject := builder.TestInfrastructureCluster(ns.Name, "obj1").WithSpecFields(map[string]interface{}{ + "spec.controlPlaneEndpoint.host": "1.2.3.4", + "spec.controlPlaneEndpoint.port": int64(1234), + "spec.foo": "bar", + }).Build() + + fieldManager := "test-manager" + ssaCache := NewCache(env.GetScheme()) + + // Create the object + createObject := initialObject.DeepCopy() + _, err = Patch(ctx, env.GetClient(), fieldManager, createObject) + g.Expect(err).ToNot(HaveOccurred()) + + // Update the object and verify that the request was not cached as the object was changed. + // Get the original object. + originalObject := initialObject.DeepCopy() + g.Expect(env.GetClient().Get(ctx, client.ObjectKeyFromObject(originalObject), originalObject)) + // Modify the object + modifiedObject := initialObject.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedObject.Object, "baz", "spec", "foo")).To(Succeed()) + // Compute request identifier, so we can later verify that the update call was not cached. + requestIdentifier, err := ComputeRequestIdentifier(env.GetScheme(), originalObject, modifiedObject) + g.Expect(err).ToNot(HaveOccurred()) + // Update the object + _, err = Patch(ctx, env.GetClient(), fieldManager, modifiedObject, WithCachingProxy{Cache: ssaCache, Original: originalObject}) + g.Expect(err).ToNot(HaveOccurred()) + // Verify that request was not cached (as it changed the object) + g.Expect(ssaCache.Has(requestIdentifier)).To(BeFalse()) + + // Repeat the same update and verify that the request was cached as the object was not changed. + // Get the original object. + originalObject = initialObject.DeepCopy() + g.Expect(env.GetClient().Get(ctx, client.ObjectKeyFromObject(originalObject), originalObject)) + // Modify the object + modifiedObject = initialObject.DeepCopy() + g.Expect(unstructured.SetNestedField(modifiedObject.Object, "baz", "spec", "foo")).To(Succeed()) + // Compute request identifier, so we can later verify that the update call was not cached. + requestIdentifier, err = ComputeRequestIdentifier(env.GetScheme(), originalObject, modifiedObject) + g.Expect(err).ToNot(HaveOccurred()) + // Update the object + _, err = Patch(ctx, env.GetClient(), fieldManager, modifiedObject, WithCachingProxy{Cache: ssaCache, Original: originalObject}) + g.Expect(err).ToNot(HaveOccurred()) + // Verify that request was not cached (as it changed the object) + g.Expect(ssaCache.Has(requestIdentifier)).To(BeTrue()) +} diff --git a/internal/util/ssa/suite_test.go b/internal/util/ssa/suite_test.go new file mode 100644 index 000000000000..4af5888da5b7 --- /dev/null +++ b/internal/util/ssa/suite_test.go @@ -0,0 +1,53 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ssa + +import ( + "os" + "testing" + "time" + + . "github.com/onsi/gomega" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + ctrl "sigs.k8s.io/controller-runtime" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/internal/test/envtest" +) + +var ( + ctx = ctrl.SetupSignalHandler() + fakeScheme = runtime.NewScheme() + env *envtest.Environment +) + +func init() { + _ = clientgoscheme.AddToScheme(fakeScheme) + _ = clusterv1.AddToScheme(fakeScheme) + _ = apiextensionsv1.AddToScheme(fakeScheme) +} + +func TestMain(m *testing.M) { + SetDefaultEventuallyPollingInterval(100 * time.Millisecond) + SetDefaultEventuallyTimeout(30 * time.Second) + os.Exit(envtest.Run(ctx, envtest.RunInput{ + M: m, + SetupEnv: func(e *envtest.Environment) { env = e }, + })) +} From 9d657fc991cb7fdd332e6e55cd072b8be2d921e0 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 2 Mar 2023 07:32:27 +0100 Subject: [PATCH 3/3] KCP: implement SSA caching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- .../internal/controllers/controller.go | 2 ++ .../internal/controllers/controller_test.go | 2 +- .../kubeadm/internal/controllers/helpers.go | 28 +++++++------------ 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index 9be7586398d3..5f3e48128ef7 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -86,6 +86,7 @@ type KubeadmControlPlaneReconciler struct { // support SSA. This flag should be dropped after all affected tests are migrated // to envtest. disableInPlacePropagation bool + ssaCache ssa.Cache } func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { @@ -113,6 +114,7 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(ctx context.Context, mg r.controller = c r.recorder = mgr.GetEventRecorderFor("kubeadm-control-plane-controller") + r.ssaCache = ssa.NewCache(mgr.GetScheme()) if r.managementCluster == nil { if r.Tracker == nil { diff --git a/controlplane/kubeadm/internal/controllers/controller_test.go b/controlplane/kubeadm/internal/controllers/controller_test.go index 5e5598060482..ecec2ff54737 100644 --- a/controlplane/kubeadm/internal/controllers/controller_test.go +++ b/controlplane/kubeadm/internal/controllers/controller_test.go @@ -1574,7 +1574,7 @@ func TestKubeadmControlPlaneReconciler_syncMachines(t *testing.T) { // Run syncMachines to clean up managed fields and have proper field ownership // for Machines, InfrastructureMachines and KubeadmConfigs. - reconciler := &KubeadmControlPlaneReconciler{Client: env} + reconciler := &KubeadmControlPlaneReconciler{Client: env, ssaCache: ssa.NewCache(env.GetScheme())} g.Expect(reconciler.syncMachines(ctx, controlPlane)).To(Succeed()) // The inPlaceMutatingMachine should have cleaned up managed fields. diff --git a/controlplane/kubeadm/internal/controllers/helpers.go b/controlplane/kubeadm/internal/controllers/helpers.go index 03a70934f434..72ee3231dae0 100644 --- a/controlplane/kubeadm/internal/controllers/helpers.go +++ b/controlplane/kubeadm/internal/controllers/helpers.go @@ -38,6 +38,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/external" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" + "sigs.k8s.io/cluster-api/internal/util/ssa" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/conditions" @@ -305,12 +306,8 @@ func (r *KubeadmControlPlaneReconciler) updateExternalObject(ctx context.Context // Update annotations updatedObject.SetAnnotations(kcp.Spec.MachineTemplate.ObjectMeta.Annotations) - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(kcpManagerName), - } - if err := r.Client.Patch(ctx, updatedObject, client.Apply, patchOptions...); err != nil { - return errors.Wrapf(err, "failed to update %s", klog.KObj(obj)) + if _, err := ssa.Patch(ctx, r.Client, kcpManagerName, updatedObject); err != nil { + return errors.Wrapf(err, "failed to update %s", obj.GetObjectKind().GroupVersionKind().Kind) } return nil } @@ -320,12 +317,8 @@ func (r *KubeadmControlPlaneReconciler) createMachine(ctx context.Context, kcp * if err != nil { return errors.Wrap(err, "failed to create Machine: failed to compute desired Machine") } - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(kcpManagerName), - } - if err := r.Client.Patch(ctx, machine, client.Apply, patchOptions...); err != nil { - return errors.Wrap(err, "failed to create Machine: apply failed") + if _, err := ssa.Patch(ctx, r.Client, kcpManagerName, machine); err != nil { + return errors.Wrap(err, "failed to create Machine") } // Remove the annotation tracking that a remediation is in progress (the remediation completed when // the replacement machine has been created above). @@ -342,13 +335,12 @@ func (r *KubeadmControlPlaneReconciler) updateMachine(ctx context.Context, machi if err != nil { return nil, errors.Wrap(err, "failed to update Machine: failed to compute desired Machine") } - patchOptions := []client.PatchOption{ - client.ForceOwnership, - client.FieldOwner(kcpManagerName), - } - if err := r.Client.Patch(ctx, updatedMachine, client.Apply, patchOptions...); err != nil { - return nil, errors.Wrap(err, "failed to update Machine: apply failed") + + updatedObject, err := ssa.Patch(ctx, r.Client, kcpManagerName, updatedMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: machine}) + if err != nil { + return nil, errors.Wrap(err, "failed to update Machine") } + updatedMachine = updatedObject.(*clusterv1.Machine) return updatedMachine, nil }