From d642098d0b883e84206baee957694fba3fa1d099 Mon Sep 17 00:00:00 2001 From: Daniele Rolando Date: Tue, 2 Sep 2025 15:23:47 -0700 Subject: [PATCH 1/5] Emit metric tracking empty responses from prometheus Signed-off-by: Daniele Rolando --- CHANGELOG.md | 1 + pkg/metricscollector/metricscollectors.go | 10 ++++++++++ pkg/metricscollector/opentelemetry.go | 11 +++++++++++ pkg/metricscollector/prommetrics.go | 14 ++++++++++++++ pkg/scalers/prometheus_scaler.go | 3 +++ 5 files changed, 39 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2261b5130a2..5dd14b5e0fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,6 +90,7 @@ To learn more about active deprecations, we recommend checking [GitHub Discussio - **Github Scaler**: Add support to control unlabeled job/runner matching ([#6900](https://github.com/kedacore/keda/issues/6900)) - **Metrics API Scaler**: Support AuthParams for authMode ([#6939](https://github.com/kedacore/keda/issues/6939)) - **Metrics API Scaler**: Support multiple auth methods simultaneously ([#6642](https://github.com/kedacore/keda/issues/6642)) +- **Prometheus Scaler**: Emit metric tracking empty responses from prometheus ([#7060](https://github.com/kedacore/keda/pull/7060)) - **Temporal Scaler**: Support custom tlsServerName ([#6820](https://github.com/kedacore/keda/pull/6820)) ### Fixes diff --git a/pkg/metricscollector/metricscollectors.go b/pkg/metricscollector/metricscollectors.go index 7a6d1055ffa..55d5332204d 100644 --- a/pkg/metricscollector/metricscollectors.go +++ b/pkg/metricscollector/metricscollectors.go @@ -79,6 +79,9 @@ type MetricsCollector interface { // RecordCloudEventQueueStatus record the number of cloudevents that are waiting for emitting RecordCloudEventQueueStatus(namespace string, value int) + + // RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result + RecordEmptyPrometheusMetricError() } func NewMetricsCollectors(enablePrometheusMetrics bool, enableOpenTelemetryMetrics bool) { @@ -205,6 +208,13 @@ func RecordCloudEventQueueStatus(namespace string, value int) { } } +// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result +func RecordEmptyPrometheusMetricError() { + for _, element := range collectors { + element.RecordEmptyPrometheusMetricError() + } +} + // Returns the ServerMetrics object for GRPC Server metrics. Used to initialize the GRPC server with the proper intercepts // Currently, only Prometheus metrics are supported. func GetServerMetrics() *grpcprom.ServerMetrics { diff --git a/pkg/metricscollector/opentelemetry.go b/pkg/metricscollector/opentelemetry.go index 14e2aa32f47..560a5dbd2ba 100644 --- a/pkg/metricscollector/opentelemetry.go +++ b/pkg/metricscollector/opentelemetry.go @@ -34,6 +34,7 @@ var ( otCrdTotalsCounterDeprecated api.Int64UpDownCounter otTriggerRegisteredTotalsCounter api.Int64UpDownCounter otCrdRegisteredTotalsCounter api.Int64UpDownCounter + otEmptyPrometheusMetricError api.Int64Counter otelScalerMetricVals []OtelMetricFloat64Val otelScalerMetricsLatencyVals []OtelMetricFloat64Val @@ -135,6 +136,11 @@ func initMeters() { otLog.Error(err, msg) } + otEmptyPrometheusMetricError, err = meter.Int64Counter("keda.prometheus.metrics.empty.error", api.WithDescription("Number of times a prometheus query returns an empty result")) + if err != nil { + otLog.Error(err, msg) + } + _, err = meter.Float64ObservableGauge( "keda.scaler.metrics.value", api.WithDescription("The current value for each scaler's metric that would be used by the HPA in computing the target average"), @@ -506,3 +512,8 @@ func (o *OtelMetrics) RecordCloudEventQueueStatus(namespace string, value int) { otCloudEventQueueStatus.measurementOption = opt otCloudEventQueueStatusVals = append(otCloudEventQueueStatusVals, otCloudEventQueueStatus) } + +// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result +func (o *OtelMetrics) RecordEmptyPrometheusMetricError() { + otEmptyPrometheusMetricError.Add(context.Background(), 1, nil) +} diff --git a/pkg/metricscollector/prommetrics.go b/pkg/metricscollector/prommetrics.go index 92b60c17f26..ee22c8cab60 100644 --- a/pkg/metricscollector/prommetrics.go +++ b/pkg/metricscollector/prommetrics.go @@ -104,6 +104,14 @@ var ( }, []string{"namespace", "scaledJob"}, ) + emptyPrometheusMetricError = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: DefaultPromMetricsNamespace, + Subsystem: "prometheus", + Name: "metrics_empty_error_total", + Help: "Number of times a prometheus query returns an empty result", + }, + ) triggerRegistered = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: DefaultPromMetricsNamespace, @@ -168,6 +176,7 @@ func NewPromMetrics() *PromMetrics { metrics.Registry.MustRegister(triggerRegistered) metrics.Registry.MustRegister(crdRegistered) metrics.Registry.MustRegister(scaledJobErrors) + metrics.Registry.MustRegister(emptyPrometheusMetricError) metrics.Registry.MustRegister(buildInfo) @@ -328,6 +337,11 @@ func (p *PromMetrics) RecordCloudEventQueueStatus(namespace string, value int) { cloudeventQueueStatus.With(prometheus.Labels{"namespace": namespace}).Set(float64(value)) } +// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result +func (p *PromMetrics) RecordEmptyPrometheusMetricError() { + emptyPrometheusMetricError.Inc() +} + // Returns a grpcprom server Metrics object and registers the metrics. The object contains // interceptors to chain to the server so that all requests served are observed. Intended to be called // as part of initialization of metricscollector, hence why this function is not exported diff --git a/pkg/scalers/prometheus_scaler.go b/pkg/scalers/prometheus_scaler.go index 54a53540d6a..44dc2f05ca7 100644 --- a/pkg/scalers/prometheus_scaler.go +++ b/pkg/scalers/prometheus_scaler.go @@ -17,6 +17,7 @@ import ( "k8s.io/metrics/pkg/apis/external_metrics" kedav1alpha1 "github.com/kedacore/keda/v2/apis/keda/v1alpha1" + "github.com/kedacore/keda/v2/pkg/metricscollector" "github.com/kedacore/keda/v2/pkg/scalers/authentication" "github.com/kedacore/keda/v2/pkg/scalers/aws" "github.com/kedacore/keda/v2/pkg/scalers/azure" @@ -255,6 +256,7 @@ func (s *prometheusScaler) ExecutePromQuery(ctx context.Context) (float64, error if s.metadata.IgnoreNullValues { return 0, nil } + metricscollector.RecordEmptyPrometheusMetricError() return -1, fmt.Errorf("prometheus metrics 'prometheus' target may be lost, the result is empty") } else if len(result.Data.Result) > 1 { return -1, fmt.Errorf("prometheus query %s returned multiple elements", s.metadata.Query) @@ -265,6 +267,7 @@ func (s *prometheusScaler) ExecutePromQuery(ctx context.Context) (float64, error if s.metadata.IgnoreNullValues { return 0, nil } + metricscollector.RecordEmptyPrometheusMetricError() return -1, fmt.Errorf("prometheus metrics 'prometheus' target may be lost, the value list is empty") } else if valueLen < 2 { return -1, fmt.Errorf("prometheus query %s didn't return enough values", s.metadata.Query) From 3ad22036cb7d21d1d3db322bfac7dd30d4a444b3 Mon Sep 17 00:00:00 2001 From: Daniele Rolando Date: Mon, 29 Sep 2025 10:37:35 -0700 Subject: [PATCH 2/5] rename to empty_upstream_responses_total Signed-off-by: Daniele Rolando --- pkg/metricscollector/metricscollectors.go | 8 ++++---- pkg/metricscollector/opentelemetry.go | 10 +++++----- pkg/metricscollector/prommetrics.go | 14 +++++++------- pkg/scalers/prometheus_scaler.go | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pkg/metricscollector/metricscollectors.go b/pkg/metricscollector/metricscollectors.go index 55d5332204d..5c34e09d858 100644 --- a/pkg/metricscollector/metricscollectors.go +++ b/pkg/metricscollector/metricscollectors.go @@ -80,8 +80,8 @@ type MetricsCollector interface { // RecordCloudEventQueueStatus record the number of cloudevents that are waiting for emitting RecordCloudEventQueueStatus(namespace string, value int) - // RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result - RecordEmptyPrometheusMetricError() + // RecordEmptyUpstreamResponse counts the number of times a query returns an empty result + RecordEmptyUpstreamResponse() } func NewMetricsCollectors(enablePrometheusMetrics bool, enableOpenTelemetryMetrics bool) { @@ -209,9 +209,9 @@ func RecordCloudEventQueueStatus(namespace string, value int) { } // RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result -func RecordEmptyPrometheusMetricError() { +func RecordEmptyUpstreamResponse() { for _, element := range collectors { - element.RecordEmptyPrometheusMetricError() + element.RecordEmptyUpstreamResponse() } } diff --git a/pkg/metricscollector/opentelemetry.go b/pkg/metricscollector/opentelemetry.go index 560a5dbd2ba..702b387565c 100644 --- a/pkg/metricscollector/opentelemetry.go +++ b/pkg/metricscollector/opentelemetry.go @@ -34,7 +34,7 @@ var ( otCrdTotalsCounterDeprecated api.Int64UpDownCounter otTriggerRegisteredTotalsCounter api.Int64UpDownCounter otCrdRegisteredTotalsCounter api.Int64UpDownCounter - otEmptyPrometheusMetricError api.Int64Counter + otEmptyUpstreamResponses api.Int64Counter otelScalerMetricVals []OtelMetricFloat64Val otelScalerMetricsLatencyVals []OtelMetricFloat64Val @@ -136,7 +136,7 @@ func initMeters() { otLog.Error(err, msg) } - otEmptyPrometheusMetricError, err = meter.Int64Counter("keda.prometheus.metrics.empty.error", api.WithDescription("Number of times a prometheus query returns an empty result")) + otEmptyUpstreamResponses, err = meter.Int64Counter("keda.empty.upstream.responses", api.WithDescription("Number of times a query returns an empty result")) if err != nil { otLog.Error(err, msg) } @@ -513,7 +513,7 @@ func (o *OtelMetrics) RecordCloudEventQueueStatus(namespace string, value int) { otCloudEventQueueStatusVals = append(otCloudEventQueueStatusVals, otCloudEventQueueStatus) } -// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result -func (o *OtelMetrics) RecordEmptyPrometheusMetricError() { - otEmptyPrometheusMetricError.Add(context.Background(), 1, nil) +// RecordEmptyUpstreamResponse counts the number of times a query returns an empty result +func (o *OtelMetrics) RecordEmptyUpstreamResponse() { + otEmptyUpstreamResponses.Add(context.Background(), 1, nil) } diff --git a/pkg/metricscollector/prommetrics.go b/pkg/metricscollector/prommetrics.go index ee22c8cab60..357e2ec6f98 100644 --- a/pkg/metricscollector/prommetrics.go +++ b/pkg/metricscollector/prommetrics.go @@ -104,12 +104,12 @@ var ( }, []string{"namespace", "scaledJob"}, ) - emptyPrometheusMetricError = prometheus.NewCounter( + emptyUpstreamResponse = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: DefaultPromMetricsNamespace, Subsystem: "prometheus", - Name: "metrics_empty_error_total", - Help: "Number of times a prometheus query returns an empty result", + Name: "empty_upstream_responses_total", + Help: "Number of times a query returns an empty result", }, ) triggerRegistered = prometheus.NewGaugeVec( @@ -176,7 +176,7 @@ func NewPromMetrics() *PromMetrics { metrics.Registry.MustRegister(triggerRegistered) metrics.Registry.MustRegister(crdRegistered) metrics.Registry.MustRegister(scaledJobErrors) - metrics.Registry.MustRegister(emptyPrometheusMetricError) + metrics.Registry.MustRegister(emptyUpstreamResponse) metrics.Registry.MustRegister(buildInfo) @@ -337,9 +337,9 @@ func (p *PromMetrics) RecordCloudEventQueueStatus(namespace string, value int) { cloudeventQueueStatus.With(prometheus.Labels{"namespace": namespace}).Set(float64(value)) } -// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result -func (p *PromMetrics) RecordEmptyPrometheusMetricError() { - emptyPrometheusMetricError.Inc() +// RecordEmptyUpstreamResponse counts the number of times a query returns an empty result +func (p *PromMetrics) RecordEmptyUpstreamResponse() { + emptyUpstreamResponse.Inc() } // Returns a grpcprom server Metrics object and registers the metrics. The object contains diff --git a/pkg/scalers/prometheus_scaler.go b/pkg/scalers/prometheus_scaler.go index 44dc2f05ca7..41ea7624010 100644 --- a/pkg/scalers/prometheus_scaler.go +++ b/pkg/scalers/prometheus_scaler.go @@ -256,7 +256,7 @@ func (s *prometheusScaler) ExecutePromQuery(ctx context.Context) (float64, error if s.metadata.IgnoreNullValues { return 0, nil } - metricscollector.RecordEmptyPrometheusMetricError() + metricscollector.RecordEmptyUpstreamResponse() return -1, fmt.Errorf("prometheus metrics 'prometheus' target may be lost, the result is empty") } else if len(result.Data.Result) > 1 { return -1, fmt.Errorf("prometheus query %s returned multiple elements", s.metadata.Query) @@ -267,7 +267,7 @@ func (s *prometheusScaler) ExecutePromQuery(ctx context.Context) (float64, error if s.metadata.IgnoreNullValues { return 0, nil } - metricscollector.RecordEmptyPrometheusMetricError() + metricscollector.RecordEmptyUpstreamResponse() return -1, fmt.Errorf("prometheus metrics 'prometheus' target may be lost, the value list is empty") } else if valueLen < 2 { return -1, fmt.Errorf("prometheus query %s didn't return enough values", s.metadata.Query) From a5aea4a27f5189d80a3a499f340a0206a8ce8611 Mon Sep 17 00:00:00 2001 From: Daniele Rolando Date: Mon, 29 Sep 2025 10:39:22 -0700 Subject: [PATCH 3/5] update comment Signed-off-by: Daniele Rolando --- pkg/metricscollector/metricscollectors.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/metricscollector/metricscollectors.go b/pkg/metricscollector/metricscollectors.go index 5c34e09d858..5d9c72e3cfb 100644 --- a/pkg/metricscollector/metricscollectors.go +++ b/pkg/metricscollector/metricscollectors.go @@ -208,7 +208,7 @@ func RecordCloudEventQueueStatus(namespace string, value int) { } } -// RecordEmptyPrometheusMetricError counts the number of times a prometheus query returns an empty result +// RecordEmptyPrometheusMetricError counts the number of times a query returns an empty result func RecordEmptyUpstreamResponse() { for _, element := range collectors { element.RecordEmptyUpstreamResponse() From 6b67a565128a74ccb04e97b3d3b04e0f3c965be4 Mon Sep 17 00:00:00 2001 From: drolando-stripe <102543345+drolando-stripe@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:09:36 -0700 Subject: [PATCH 4/5] Update pkg/metricscollector/prommetrics.go Co-authored-by: Jorge Turrado Ferrero Signed-off-by: drolando-stripe <102543345+drolando-stripe@users.noreply.github.com> --- pkg/metricscollector/prommetrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/metricscollector/prommetrics.go b/pkg/metricscollector/prommetrics.go index 357e2ec6f98..29076963a79 100644 --- a/pkg/metricscollector/prommetrics.go +++ b/pkg/metricscollector/prommetrics.go @@ -107,7 +107,7 @@ var ( emptyUpstreamResponse = prometheus.NewCounter( prometheus.CounterOpts{ Namespace: DefaultPromMetricsNamespace, - Subsystem: "prometheus", + Subsystem: "scaler", Name: "empty_upstream_responses_total", Help: "Number of times a query returns an empty result", }, From 0f5e0709beb46af33df792126558aaa8ae71305b Mon Sep 17 00:00:00 2001 From: drolando-stripe <102543345+drolando-stripe@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:09:50 -0700 Subject: [PATCH 5/5] Update pkg/metricscollector/opentelemetry.go Co-authored-by: Jorge Turrado Ferrero Signed-off-by: drolando-stripe <102543345+drolando-stripe@users.noreply.github.com> --- pkg/metricscollector/opentelemetry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/metricscollector/opentelemetry.go b/pkg/metricscollector/opentelemetry.go index 702b387565c..dafcaa71936 100644 --- a/pkg/metricscollector/opentelemetry.go +++ b/pkg/metricscollector/opentelemetry.go @@ -136,7 +136,7 @@ func initMeters() { otLog.Error(err, msg) } - otEmptyUpstreamResponses, err = meter.Int64Counter("keda.empty.upstream.responses", api.WithDescription("Number of times a query returns an empty result")) + otEmptyUpstreamResponses, err = meter.Int64Counter("keda.scaler.empty.upstream.responses", api.WithDescription("Number of times a query returns an empty result")) if err != nil { otLog.Error(err, msg) }