Skip to content

Commit 339a0ac

Browse files
fix: skip stale evaluation updates on failure
1 parent 99f9ab0 commit 339a0ac

2 files changed

Lines changed: 121 additions & 16 deletions

File tree

internal/controller/node_controller.go

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,12 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context
143143
"rule", rule.Name,
144144
"ruleResourceVersion", rule.ResourceVersion)
145145

146-
if err := r.evaluateRuleForNode(ctx, rule, node); err != nil {
147-
log.Error(err, "Failed to evaluate rule for node",
146+
evalErr := r.evaluateRuleForNode(ctx, rule, node)
147+
if evalErr != nil {
148+
log.Error(evalErr, "Failed to evaluate rule for node",
148149
"node", node.Name, "rule", rule.Name)
149150
// Continue with other rules even if one fails
150-
r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error())
151+
r.recordNodeFailure(rule, node.Name, "EvaluationError", evalErr.Error())
151152
}
152153

153154
// Persist the rule status
@@ -164,20 +165,18 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context
164165

165166
patch := client.MergeFrom(latestRule.DeepCopy())
166167

167-
// Upsert the node's evaluation only when evaluateRuleForNode produced
168-
// one. On the failure path it returns before updateNodeEvaluationStatus
169-
// runs, so writing the zero value here would either clobber a valid
170-
// prior entry or append one with an empty NodeName, which the CRD
171-
// rejects (MinLength=1) and would fail the whole status patch — taking
172-
// the FailedNodes update below down with it.
173-
var currEval *readinessv1alpha1.NodeEvaluation
174-
for i := range rule.Status.NodeEvaluations {
175-
if rule.Status.NodeEvaluations[i].NodeName == node.Name {
176-
currEval = &rule.Status.NodeEvaluations[i]
177-
break
168+
// Upsert the node's evaluation only after a successful evaluation.
169+
// On the failure path evaluateRuleForNode returns before recording a
170+
// fresh NodeEvaluation, so this must leave any existing persisted
171+
// evaluation untouched and only persist FailedNodes below.
172+
if evalErr == nil {
173+
var currEval *readinessv1alpha1.NodeEvaluation
174+
for i := range rule.Status.NodeEvaluations {
175+
if rule.Status.NodeEvaluations[i].NodeName == node.Name {
176+
currEval = &rule.Status.NodeEvaluations[i]
177+
break
178+
}
178179
}
179-
}
180-
if currEval != nil {
181180
found := false
182181
for i := range latestRule.Status.NodeEvaluations {
183182
if latestRule.Status.NodeEvaluations[i].NodeName == node.Name {

internal/controller/node_controller_test.go

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,5 +1048,111 @@ var _ = Describe("Node Controller", func() {
10481048
Expect(persisted.Status.NodeEvaluations).To(ContainElement(
10491049
HaveField("NodeName", untouchedNode)))
10501050
})
1051+
1052+
It("preserves the existing target NodeEvaluation when recording a failure", func() {
1053+
ctx := context.Background()
1054+
testScheme := runtime.NewScheme()
1055+
Expect(corev1.AddToScheme(testScheme)).To(Succeed())
1056+
Expect(nodereadinessiov1alpha1.AddToScheme(testScheme)).To(Succeed())
1057+
1058+
const (
1059+
targetNode = "stale-eval-node"
1060+
targetRule = "stale-eval-rule"
1061+
targetTaint = "readiness.k8s.io/stale-eval-taint"
1062+
)
1063+
1064+
node := &corev1.Node{
1065+
ObjectMeta: metav1.ObjectMeta{
1066+
Name: targetNode,
1067+
Labels: map[string]string{"readiness-test": "stale-eval"},
1068+
},
1069+
Status: corev1.NodeStatus{
1070+
Conditions: []corev1.NodeCondition{
1071+
{Type: "Ready", Status: corev1.ConditionFalse},
1072+
},
1073+
},
1074+
}
1075+
1076+
persistedEval := nodereadinessiov1alpha1.NodeEvaluation{
1077+
NodeName: targetNode,
1078+
ConditionResults: []nodereadinessiov1alpha1.ConditionEvaluationResult{
1079+
{
1080+
Type: "Ready",
1081+
CurrentStatus: corev1.ConditionTrue,
1082+
RequiredStatus: corev1.ConditionTrue,
1083+
},
1084+
},
1085+
TaintStatus: nodereadinessiov1alpha1.TaintStatusAbsent,
1086+
LastEvaluationTime: metav1.Now(),
1087+
}
1088+
staleCachedEval := persistedEval
1089+
staleCachedEval.TaintStatus = nodereadinessiov1alpha1.TaintStatusPresent
1090+
1091+
rule := &nodereadinessiov1alpha1.NodeReadinessRule{
1092+
ObjectMeta: metav1.ObjectMeta{Name: targetRule},
1093+
Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{
1094+
Conditions: []nodereadinessiov1alpha1.ConditionRequirement{
1095+
{Type: "Ready", RequiredStatus: corev1.ConditionTrue},
1096+
},
1097+
Taint: corev1.Taint{
1098+
Key: targetTaint,
1099+
Effect: corev1.TaintEffectNoSchedule,
1100+
},
1101+
NodeSelector: metav1.LabelSelector{
1102+
MatchLabels: map[string]string{"readiness-test": "stale-eval"},
1103+
},
1104+
EnforcementMode: nodereadinessiov1alpha1.EnforcementModeContinuous,
1105+
},
1106+
Status: nodereadinessiov1alpha1.NodeReadinessRuleStatus{
1107+
NodeEvaluations: []nodereadinessiov1alpha1.NodeEvaluation{persistedEval},
1108+
},
1109+
}
1110+
cachedRule := rule.DeepCopy()
1111+
cachedRule.Status.NodeEvaluations = []nodereadinessiov1alpha1.NodeEvaluation{staleCachedEval}
1112+
1113+
fc := fakeclient.NewClientBuilder().
1114+
WithScheme(testScheme).
1115+
WithObjects(node, rule).
1116+
WithStatusSubresource(rule).
1117+
WithInterceptorFuncs(interceptor.Funcs{
1118+
Patch: func(ctx context.Context, c client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
1119+
if _, ok := obj.(*corev1.Node); ok && obj.GetName() == targetNode {
1120+
return apierrors.NewForbidden(corev1.Resource("nodes"), obj.GetName(), nil)
1121+
}
1122+
return c.Patch(ctx, obj, patch, opts...)
1123+
},
1124+
}).
1125+
Build()
1126+
1127+
controller := &RuleReadinessController{
1128+
Client: fc,
1129+
Scheme: testScheme,
1130+
clientset: fake.NewSimpleClientset(),
1131+
ruleCache: make(map[string]*nodereadinessiov1alpha1.NodeReadinessRule),
1132+
EventRecorder: record.NewFakeRecorder(10),
1133+
}
1134+
controller.updateRuleCache(ctx, cachedRule)
1135+
1136+
reconciler := &NodeReconciler{
1137+
Client: fc,
1138+
Scheme: testScheme,
1139+
Controller: controller,
1140+
}
1141+
1142+
_, err := reconciler.Reconcile(ctx, reconcile.Request{
1143+
NamespacedName: types.NamespacedName{Name: targetNode},
1144+
})
1145+
Expect(err).NotTo(HaveOccurred())
1146+
1147+
persisted := &nodereadinessiov1alpha1.NodeReadinessRule{}
1148+
Expect(fc.Get(ctx, types.NamespacedName{Name: targetRule}, persisted)).To(Succeed())
1149+
1150+
Expect(persisted.Status.FailedNodes).To(HaveLen(1))
1151+
Expect(persisted.Status.FailedNodes[0].NodeName).To(Equal(targetNode))
1152+
Expect(persisted.Status.NodeEvaluations).To(HaveLen(1))
1153+
Expect(persisted.Status.NodeEvaluations[0].NodeName).To(Equal(targetNode))
1154+
Expect(persisted.Status.NodeEvaluations[0].TaintStatus).To(Equal(
1155+
nodereadinessiov1alpha1.TaintStatusAbsent))
1156+
})
10511157
})
10521158
})

0 commit comments

Comments
 (0)