Skip to content

Commit 9ecc85d

Browse files
fix: increment metrics.Failures counter on node reconciler evaluation error
Signed-off-by: mohanakatari119-bit <mohana.katari119@gmail.com>
1 parent 87fee94 commit 9ecc85d

2 files changed

Lines changed: 77 additions & 0 deletions

File tree

internal/controller/node_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ func (r *RuleReadinessController) processNodeAgainstAllRules(ctx context.Context
148148
"node", node.Name, "rule", rule.Name)
149149
// Continue with other rules even if one fails
150150
r.recordNodeFailure(rule, node.Name, "EvaluationError", err.Error())
151+
metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Inc()
151152
}
152153

153154
// Persist the rule status

internal/controller/node_controller_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@ package controller
1818

1919
import (
2020
"context"
21+
"fmt"
2122
"sync/atomic"
2223
"time"
2324

2425
. "github.com/onsi/ginkgo/v2"
2526
. "github.com/onsi/gomega"
27+
dto "github.com/prometheus/client_model/go"
2628
corev1 "k8s.io/api/core/v1"
2729
apierrors "k8s.io/apimachinery/pkg/api/errors"
2830
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -36,6 +38,7 @@ import (
3638
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3739

3840
nodereadinessiov1alpha1 "sigs.k8s.io/node-readiness-controller/api/v1alpha1"
41+
"sigs.k8s.io/node-readiness-controller/internal/metrics"
3942
)
4043

4144
var _ = Describe("Node Controller", func() {
@@ -937,4 +940,77 @@ var _ = Describe("Node Controller", func() {
937940
"Patch should not be called when taint removal is a no-op")
938941
})
939942
})
943+
944+
Context("metrics.Failures counter in node reconciler", func() {
945+
var (
946+
ctx context.Context
947+
testScheme *runtime.Scheme
948+
)
949+
950+
BeforeEach(func() {
951+
ctx = context.Background()
952+
testScheme = runtime.NewScheme()
953+
Expect(corev1.AddToScheme(testScheme)).To(Succeed())
954+
Expect(nodereadinessiov1alpha1.AddToScheme(testScheme)).To(Succeed())
955+
})
956+
957+
It("should increment metrics.Failures when evaluateRuleForNode returns an error", func() {
958+
// The node has no taint yet; the rule requires conditions not met,
959+
// so evaluateRuleForNode will call addTaintBySpec. We intercept
960+
// Patch to return an error, forcing the failure path.
961+
node := &corev1.Node{
962+
ObjectMeta: metav1.ObjectMeta{Name: "metrics-fail-node"},
963+
}
964+
rule := &nodereadinessiov1alpha1.NodeReadinessRule{
965+
ObjectMeta: metav1.ObjectMeta{Name: "metrics-fail-rule"},
966+
Spec: nodereadinessiov1alpha1.NodeReadinessRuleSpec{
967+
NodeSelector: metav1.LabelSelector{},
968+
Conditions: []nodereadinessiov1alpha1.ConditionRequirement{
969+
{Type: "TestCondition", RequiredStatus: corev1.ConditionTrue},
970+
},
971+
Taint: corev1.Taint{
972+
Key: "readiness.k8s.io/test",
973+
Effect: corev1.TaintEffectNoSchedule,
974+
},
975+
EnforcementMode: nodereadinessiov1alpha1.EnforcementModeContinuous,
976+
},
977+
}
978+
979+
fc := fakeclient.NewClientBuilder().
980+
WithScheme(testScheme).
981+
WithObjects(node, rule).
982+
WithStatusSubresource(rule).
983+
WithInterceptorFuncs(interceptor.Funcs{
984+
Patch: func(ctx context.Context, c client.WithWatch, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
985+
if _, ok := obj.(*corev1.Node); ok {
986+
return apierrors.NewInternalError(fmt.Errorf("simulated patch failure"))
987+
}
988+
return c.Patch(ctx, obj, patch, opts...)
989+
},
990+
}).
991+
Build()
992+
993+
controller := &RuleReadinessController{
994+
Client: fc,
995+
Scheme: testScheme,
996+
clientset: fake.NewSimpleClientset(),
997+
ruleCache: map[string]*nodereadinessiov1alpha1.NodeReadinessRule{rule.Name: rule},
998+
EventRecorder: record.NewFakeRecorder(10),
999+
}
1000+
1001+
// Read the failure counter before the call.
1002+
beforeM := &dto.Metric{}
1003+
_ = metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Write(beforeM)
1004+
before := beforeM.GetCounter().GetValue()
1005+
1006+
controller.processNodeAgainstAllRules(ctx, node)
1007+
1008+
afterM := &dto.Metric{}
1009+
_ = metrics.Failures.WithLabelValues(rule.Name, "EvaluationError").Write(afterM)
1010+
after := afterM.GetCounter().GetValue()
1011+
1012+
Expect(after).To(BeNumerically(">", before),
1013+
"metrics.Failures{rule, EvaluationError} must increment when the node reconciler hits an evaluation error")
1014+
})
1015+
})
9401016
})

0 commit comments

Comments
 (0)