From ee499b5a0358e9657787eb7f8e5cd819c9f96f5a Mon Sep 17 00:00:00 2001 From: Zhou JIANG Date: Mon, 18 Aug 2025 16:52:37 -0700 Subject: [PATCH 1/5] [SPARK-53325] Support Prometheus 2.0 text-based-format and best practices for metrics naming ### What changes were proposed in this pull request? This PR adds support for Prometheus text-based-format and best practices for metrics naming Existing format ``` metrics_jvm_bufferPool_direct_capacity_Number{type="gauges"} 98348 metrics_jvm_bufferPool_direct_capacity_Value{type="gauges"} 98348 metrics_jvm_bufferPool_direct_count_Number{type="gauges"} 41 metrics_jvm_bufferPool_direct_count_Value{type="gauges"} 41 metrics_kubernetes_client_http_response_latency_nanos_Count{type="histograms"} 26910 metrics_kubernetes_client_http_response_latency_nanos_Max{type="histograms"} 232417143 metrics_kubernetes_client_http_response_latency_nanos_Mean{type="histograms"} 1.1410164260725182E7 metrics_kubernetes_client_http_response_latency_nanos_Min{type="histograms"} 2931711 metrics_kubernetes_client_http_response_latency_nanos_50thPercentile{type="histograms"} 7559152.0 metrics_kubernetes_client_http_response_latency_nanos_75thPercentile{type="histograms"} 9440850.0 metrics_kubernetes_client_http_response_latency_nanos_95thPercentile{type="histograms"} 1.2576766E7 metrics_kubernetes_client_http_response_latency_nanos_98thPercentile{type="histograms"} 1.34034482E8 metrics_kubernetes_client_http_response_latency_nanos_99thPercentile{type="histograms"} 1.34034482E8 metrics_kubernetes_client_http_response_latency_nanos_999thPercentile{type="histograms"} 1.34034482E8 metrics_kubernetes_client_http_response_latency_nanos_StdDev{type="histograms"} 2.177784612259799E7 metrics_kubernetes_client_pods_get_Count{type="counters"} 8967 metrics_kubernetes_client_pods_get_MeanRate{type="counters"} 0.02678169644780033 metrics_kubernetes_client_pods_get_OneMinuteRate{type="counters"} 0.049758750361204154 metrics_kubernetes_client_pods_get_FiveMinuteRate{type="counters"} 0.035255140329213855 metrics_kubernetes_client_pods_get_FifteenMinuteRate{type="counters"} 0.02931221844089468 ``` with this patch, operator would be able to export format matching Prometheus 2.0 recommended practice like ``` # HELP jvm_bufferpool_direct_capacity Gauge metric # TYPE jvm_bufferpool_direct_capacity gauge jvm_bufferpool_direct_capacity 256092 # HELP jvm_bufferpool_direct_count Gauge metric # TYPE jvm_bufferpool_direct_count gauge jvm_bufferpool_direct_count 44 # HELP kubernetes_client_2xx_total Meter count # TYPE kubernetes_client_2xx_total counter kubernetes_client_2xx_total 130 # HELP kubernetes_client_http_response_latency Histogram metric # TYPE kubernetes_client_http_response_latency histogram kubernetes_client_http_response_latency_seconds_bucket{le="0.5"} 0.000104422 kubernetes_client_http_response_latency_seconds_bucket{le="0.75"} 0.000128422 kubernetes_client_http_response_latency_seconds_bucket{le="0.95"} 0.000139544 kubernetes_client_http_response_latency_seconds_bucket{le="0.98"} 0.000169124 kubernetes_client_http_response_latency_seconds_bucket{le="0.99"} 0.066452639 kubernetes_client_http_response_latency_seconds_count 2000 kubernetes_client_http_response_latency_seconds_sum 0.456670434 ``` ### Why are the changes needed? It's Prometheus 2.0 best practice for using the next format with necessary comments. Also, some common scrapers (like Datadog) rely on these metadata (e.g. # HELP and # TYPE) to parse metrics correctly. They may skip metrics if these are missing. ### Does this PR introduce _any_ user-facing change? New functionalities becomes available (for metrics format) ### How was this patch tested? CIs / curl on :19090/prometheus to validate the format ### Was this patch authored or co-authored using generative AI tooling? No --- docs/config_properties.md | 2 + .../operator/config/SparkOperatorConf.java | 24 ++ .../metrics/PrometheusPullModelHandler.java | 291 +++++++++++++++++- .../PrometheusPullModelHandlerTest.java | 135 ++++++++ 4 files changed, 445 insertions(+), 7 deletions(-) create mode 100644 spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java diff --git a/docs/config_properties.md b/docs/config_properties.md index c86ada23..94900b12 100644 --- a/docs/config_properties.md +++ b/docs/config_properties.md @@ -29,6 +29,8 @@ | spark.kubernetes.operator.metrics.clientMetricsEnabled | Boolean | true | false | Enable KubernetesClient metrics for measuring the HTTP traffic to the Kubernetes API Server. Since the metrics is collected via Okhttp interceptors, can be disabled when opt in customized interceptors. | | spark.kubernetes.operator.metrics.clientMetricsGroupByResponseCodeEnabled | Boolean | true | false | When enabled, additional metrics group by http response code group(1xx, 2xx, 3xx, 4xx, 5xx) received from API server will be added. Users can disable it when their monitoring system can combine lower level kubernetes.client.http.response.<3-digit-response-code> metrics. | | spark.kubernetes.operator.metrics.port | Integer | 19090 | false | The port used for checking metrics | + | spark.kubernetes.operator.enablePrometheusTextBasedFormat | Boolean | true | false | Whether or not to enable text-based format for Prometheus 2.0, as recommended by https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format | + | spark.kubernetes.operator.enableSanitizePrometheusMetricsName | Boolean | true | false | Whether or not to enable automatic name sanitizing for all metrics based on best-practice guide from Prometheus https://prometheus.io/docs/practices/naming/ | | spark.kubernetes.operator.health.probePort | Integer | 19091 | false | The port used for health/readiness check probe status. | | spark.kubernetes.operator.health.sentinelExecutorPoolSize | Integer | 3 | false | Size of executor service in Sentinel Managers to check the health of sentinel resources. | | spark.kubernetes.operator.health.sentinelResourceReconciliationDelaySeconds | Integer | 60 | true | Allowed max time(seconds) between spec update and reconciliation for sentinel resources. | diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java index 3aca8cd5..f0c56b54 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java @@ -334,6 +334,30 @@ public final class SparkOperatorConf { .defaultValue(19090) .build(); + public static final ConfigOption EnablePrometheusTextBasedFormat = + ConfigOption.builder() + .key("spark.kubernetes.operator.enablePrometheusTextBasedFormat") + .enableDynamicOverride(false) + .description( + "Whether or not to enable text-based format for Prometheus 2.0, as " + + "recommended by " + + "https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format") + .typeParameterClass(Boolean.class) + .defaultValue(true) + .build(); + + public static final ConfigOption EnableSanitizePrometheusMetricsName = + ConfigOption.builder() + .key("spark.kubernetes.operator.enableSanitizePrometheusMetricsName") + .enableDynamicOverride(false) + .description( + "Whether or not to enable automatic name sanitizing for all metrics based on " + + "best-practice guide from Prometheus " + + "https://prometheus.io/docs/practices/naming/") + .typeParameterClass(Boolean.class) + .defaultValue(true) + .build(); + public static final ConfigOption OPERATOR_PROBE_PORT = ConfigOption.builder() .key("spark.kubernetes.operator.health.probePort") diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java index d5a72a8c..f0f1be94 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java @@ -29,21 +29,38 @@ import java.util.Map; import java.util.Properties; +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Snapshot; +import com.codahale.metrics.Timer; import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; import jakarta.servlet.http.HttpServletRequest; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.apache.spark.k8s.operator.config.SparkOperatorConf; import org.apache.spark.metrics.sink.PrometheusServlet; /** Serves as simple Prometheus sink (pull model), presenting metrics snapshot as HttpHandler. */ @Slf4j public class PrometheusPullModelHandler extends PrometheusServlet implements HttpHandler { private static final String EMPTY_RECORD_VALUE = "[]"; + @Getter private final MetricRegistry registry; + @Getter private final boolean enablePrometheusTextBasedFormat; + @Getter private final boolean enableSanitizePrometheusMetricsName; public PrometheusPullModelHandler(Properties properties, MetricRegistry registry) { super(properties, registry); + this.registry = registry; + this.enablePrometheusTextBasedFormat = + SparkOperatorConf.EnablePrometheusTextBasedFormat.getValue(); + this.enableSanitizePrometheusMetricsName = + SparkOperatorConf.EnableSanitizePrometheusMetricsName.getValue(); } @Override @@ -58,13 +75,21 @@ public void stop() { @Override public void handle(HttpExchange exchange) throws IOException { - HttpServletRequest httpServletRequest = null; - String value = getMetricsSnapshot(httpServletRequest); - sendMessage( - exchange, - HTTP_OK, - String.join("\n", filterNonEmptyRecords(value)), - Map.of("Content-Type", Collections.singletonList("text/plain;version=0.0.4"))); + if (SparkOperatorConf.EnablePrometheusTextBasedFormat.getValue()) { + sendMessage( + exchange, + HTTP_OK, + formatMetricsSnapshot(), + Map.of("Content-Type", Collections.singletonList("text/plain;version=0.0.4"))); + } else { + HttpServletRequest httpServletRequest = null; + String value = getMetricsSnapshot(httpServletRequest); + sendMessage( + exchange, + HTTP_OK, + String.join("\n", filterNonEmptyRecords(value)), + Map.of("Content-Type", Collections.singletonList("text/plain;version=0.0.4"))); + } } protected List filterNonEmptyRecords(String metricsSnapshot) { @@ -82,4 +107,256 @@ protected List filterNonEmptyRecords(String metricsSnapshot) { } return filteredRecords; } + + protected String formatMetricsSnapshot() { + Map gauges = registry.getGauges(); + Map counters = registry.getCounters(); + Map histograms = registry.getHistograms(); + Map meters = registry.getMeters(); + Map timers = registry.getTimers(); + + StringBuilder stringBuilder = new StringBuilder(); + + for (Map.Entry entry : gauges.entrySet()) { + appendIfNotEmpty(stringBuilder, formatGauge(entry.getKey(), entry.getValue())); + } + + // Counters + for (Map.Entry entry : counters.entrySet()) { + String name = sanitize(entry.getKey()) + "_total"; + Counter counter = entry.getValue(); + appendIfNotEmpty(stringBuilder, formatCounter(name, counter)); + } + + // Histograms + for (Map.Entry entry : histograms.entrySet()) { + appendIfNotEmpty(stringBuilder, formatHistogram(entry.getKey(), entry.getValue())); + } + + // Meters + for (Map.Entry entry : meters.entrySet()) { + appendIfNotEmpty(stringBuilder, formatMeter(entry.getKey(), entry.getValue())); + } + + // Timers (Meter + Histogram in nanoseconds) + for (Map.Entry entry : timers.entrySet()) { + appendIfNotEmpty(stringBuilder, formatTimer(entry.getKey(), entry.getValue())); + } + return stringBuilder.toString(); + } + + protected void appendIfNotEmpty(StringBuilder stringBuilder, String value) { + if (StringUtils.isNotEmpty(value)) { + stringBuilder.append(value); + } + } + + protected String formatGauge(String name, Gauge gauge) { + if (gauge != null + && gauge.getValue() != null + && !EMPTY_RECORD_VALUE.equals(gauge.getValue()) + && gauge.getValue() instanceof Number) { + String formattedName = sanitize(name); + return "# HELP " + + formattedName + + " Gauge metric\n" + + "# TYPE " + + formattedName + + " gauge\n" + + sanitize(formattedName) + + ' ' + + gauge.getValue() + + "\n\n"; + } + return null; + } + + protected String formatCounter(String name, Counter counter) { + if (counter != null) { + return "# HELP " + + name + + " Counter metric\n" + + "# TYPE " + + name + + " counter\n" + + name + + " " + + counter.getCount() + + "\n\n"; + } + return null; + } + + protected String formatHistogram(String name, Histogram histogram) { + if (histogram != null && histogram.getSnapshot() != null) { + StringBuilder stringBuilder = new StringBuilder(300); + String baseName = sanitize(name); + Snapshot snap = histogram.getSnapshot(); + long count = histogram.getCount(); + stringBuilder + .append("# HELP ") + .append(baseName) + .append(" Histogram metric\n# TYPE ") + .append(baseName) + .append(" histogram\n"); + boolean isNanosHistogram = baseName.contains("nanos"); + if (isNanosHistogram) { + baseName = nanosMetricsNameToSeconds(baseName); + } + appendBucket( + stringBuilder, + baseName, + "le=\"0.5\"", + isNanosHistogram ? nanosToSeconds(snap.getMedian()) : snap.getMean()); + appendBucket( + stringBuilder, + baseName, + "le=\"0.75\"", + isNanosHistogram ? nanosToSeconds(snap.get75thPercentile()) : snap.get75thPercentile()); + appendBucket( + stringBuilder, + baseName, + "le=\"0.95\"", + isNanosHistogram ? nanosToSeconds(snap.get95thPercentile()) : snap.get95thPercentile()); + appendBucket( + stringBuilder, + baseName, + "le=\"0.98\"", + isNanosHistogram ? nanosToSeconds(snap.get98thPercentile()) : snap.get98thPercentile()); + appendBucket( + stringBuilder, + baseName, + "le=\"0.99\"", + isNanosHistogram ? nanosToSeconds(snap.get99thPercentile()) : snap.get99thPercentile()); + double sum = + isNanosHistogram ? nanosToSeconds(snap.getMean() * count) : snap.getMean() * count; + stringBuilder + .append(baseName) + .append("_count ") + .append(count) + .append('\n') + .append(baseName) + .append("_sum ") + .append(sum) + .append("\n\n"); + return stringBuilder.toString(); + } + return null; + } + + protected String formatMeter(String name, Meter meter) { + if (meter != null) { + StringBuilder stringBuilder = new StringBuilder(200); + String baseName = sanitize(name); + stringBuilder + .append("# HELP ") + .append(baseName) + .append("_total Meter count\n# TYPE ") + .append(baseName) + .append("_total counter\n") + .append(baseName) + .append("_total ") + .append(meter.getCount()) + .append("\n\n# TYPE ") + .append(baseName) + .append("_rate gauge\n") + .append(baseName) + .append("_rate{interval=\"1m\"} ") + .append(meter.getOneMinuteRate()) + .append('\n') + .append(baseName) + .append("_rate{interval=\"5m\"} ") + .append(meter.getFiveMinuteRate()) + .append('\n') + .append(baseName) + .append("_rate{interval=\"15m\"} ") + .append(meter.getFifteenMinuteRate()) + .append("\n\n"); + return stringBuilder.toString(); + } + return null; + } + + protected String formatTimer(String name, Timer timer) { + if (timer != null && timer.getSnapshot() != null) { + StringBuilder stringBuilder = new StringBuilder(300); + String baseName = sanitize(name); + Snapshot snap = timer.getSnapshot(); + long count = timer.getCount(); + stringBuilder + .append("# HELP ") + .append(baseName) + .append("_duration_seconds Timer histogram\n# TYPE ") + .append(baseName) + .append("_duration_seconds histogram\n"); + appendBucket( + stringBuilder, + baseName + "_duration_seconds", + "le=\"0.5\"", + nanosToSeconds(snap.getMedian())); + appendBucket( + stringBuilder, + baseName + "_duration_seconds", + "le=\"0.75\"", + nanosToSeconds(snap.get75thPercentile())); + appendBucket( + stringBuilder, + baseName + "_duration_seconds", + "le=\"0.95\"", + nanosToSeconds(snap.get95thPercentile())); + appendBucket( + stringBuilder, + baseName + "_duration_seconds", + "le=\"0.98\"", + nanosToSeconds(snap.get98thPercentile())); + appendBucket( + stringBuilder, + baseName + "_duration_seconds", + "le=\"0.99\"", + nanosToSeconds(snap.get99thPercentile())); + stringBuilder + .append(baseName) + .append("_duration_seconds_count ") + .append(count) + .append('\n') + .append(baseName) + .append("_duration_seconds_sum ") + .append(nanosToSeconds(snap.getMean() * count)) + .append("\n\n# TYPE ") + .append(baseName) + .append("_calls_total counter\n") + .append(baseName) + .append("_calls_total ") + .append(count) + .append("\n\n"); + return stringBuilder.toString(); + } + return null; + } + + protected void appendBucket( + StringBuilder builder, String baseName, String leLabel, double value) { + builder + .append(baseName) + .append("_bucket{") + .append(leLabel) + .append("} ") + .append(value) + .append('\n'); + } + + protected double nanosToSeconds(double nanos) { + return nanos / 1_000_000_000.0; + } + + protected String sanitize(String name) { + if (enableSanitizePrometheusMetricsName) { + return name.replaceAll("[^a-zA-Z0-9_:]", "_").toLowerCase(); + } + return name; + } + + protected String nanosMetricsNameToSeconds(String name) { + return name.replaceAll("_nanos", "_seconds"); + } } diff --git a/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java b/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java new file mode 100644 index 00000000..406d0386 --- /dev/null +++ b/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.spark.k8s.operator.metrics; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Objects; +import java.util.Properties; + +import com.codahale.metrics.Counter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.Timer; +import org.junit.jupiter.api.Test; + +class PrometheusPullModelHandlerTest { + + @Test + void testFormatMetricsSnapshotIncludesGauge() throws Exception { + MetricRegistry registry = new MetricRegistry(); + registry.register("foo_gauge", (Gauge) () -> 42); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + assertTrue(output.contains("# TYPE foo_gauge gauge")); + assertTrue(output.contains("foo_gauge 42")); + } + + @Test + void testFormatMetricsSnapshotIncludesCounter() throws Exception { + MetricRegistry registry = new MetricRegistry(); + Counter counter = registry.counter("foo_counter"); + counter.inc(5); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + assertTrue(output.contains("# TYPE foo_counter_total counter")); + assertTrue(output.contains("foo_counter_total 5")); + } + + @Test + void testFormatMetricsSnapshotIncludesHistogram() throws Exception { + MetricRegistry registry = new MetricRegistry(); + Histogram histogram = registry.histogram("foo_histogram"); + histogram.update(100); + histogram.update(200); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + + assertTrue(output.contains("# TYPE foo_histogram histogram")); + assertTrue(output.contains("foo_histogram_count 2")); + assertTrue(output.contains("foo_histogram_sum")); + } + + @Test + void testFormatMetricsSnapshotIncludesHistogramWithNanos() throws Exception { + MetricRegistry registry = new MetricRegistry(); + Histogram histogram = registry.histogram("foo_nanos_histogram"); + histogram.update(563682); + histogram.update(716252); + histogram.update(292098); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + + assertTrue(output.contains("# TYPE foo_nanos_histogram histogram")); + assertTrue(output.contains("foo_seconds_histogram_count 3")); + assertTrue(output.contains("foo_seconds_histogram_sum 0.001572032")); + } + + @Test + void testFormatMetricsSnapshotIncludesMeter() throws Exception { + MetricRegistry registry = new MetricRegistry(); + Meter meter = registry.meter("foo_meter"); + meter.mark(3); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + assertTrue(output.contains("# TYPE foo_meter_total counter")); + assertTrue(output.contains("foo_meter_total 3")); + assertTrue(output.contains("foo_meter_rate{interval=\"1m\"}")); + } + + @Test + void testFormatMetricsSnapshotIncludesTimer() throws Exception { + MetricRegistry registry = new MetricRegistry(); + Timer timer = registry.timer("foo_timer"); + + Timer.Context context = timer.time(); + Thread.sleep(10); + context.stop(); + + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + assertTrue(output.contains("# TYPE foo_timer_duration_seconds histogram")); + assertTrue(output.contains("foo_timer_duration_seconds_count 1")); + assertTrue(output.contains("foo_timer_duration_seconds_sum")); + } + + @Test + void testFormatMetricsSnapshotIncludesEmpty() throws Exception { + MetricRegistry registry = new MetricRegistry(); + registry.register("foo_gauge", (Gauge) () -> null); + PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); + + String output = handler.formatMetricsSnapshot(); + assertFalse(output.contains("null")); + } +} From 35e9f6b4b43ee63aa0e7b0e1e766314919078053 Mon Sep 17 00:00:00 2001 From: Zhou JIANG Date: Mon, 25 Aug 2025 14:34:03 -0700 Subject: [PATCH 2/5] Fix style, invalid regex in name formatting, and remove duplicates in reference. --- .../k8s/operator/config/SparkOperatorConf.java | 4 ++-- .../metrics/PrometheusPullModelHandler.java | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java index f0c56b54..478369fc 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java @@ -334,7 +334,7 @@ public final class SparkOperatorConf { .defaultValue(19090) .build(); - public static final ConfigOption EnablePrometheusTextBasedFormat = + public static final ConfigOption ENABLE_PROMETHEUS_TEXT_BASED_FORMAT = ConfigOption.builder() .key("spark.kubernetes.operator.enablePrometheusTextBasedFormat") .enableDynamicOverride(false) @@ -346,7 +346,7 @@ public final class SparkOperatorConf { .defaultValue(true) .build(); - public static final ConfigOption EnableSanitizePrometheusMetricsName = + public static final ConfigOption ENABLE_SANITIZED_PROMETHEUS_METRICS_NAME = ConfigOption.builder() .key("spark.kubernetes.operator.enableSanitizePrometheusMetricsName") .enableDynamicOverride(false) diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java index f0f1be94..23786e3a 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java @@ -58,9 +58,9 @@ public PrometheusPullModelHandler(Properties properties, MetricRegistry registry super(properties, registry); this.registry = registry; this.enablePrometheusTextBasedFormat = - SparkOperatorConf.EnablePrometheusTextBasedFormat.getValue(); + SparkOperatorConf.ENABLE_PROMETHEUS_TEXT_BASED_FORMAT.getValue(); this.enableSanitizePrometheusMetricsName = - SparkOperatorConf.EnableSanitizePrometheusMetricsName.getValue(); + SparkOperatorConf.ENABLE_SANITIZED_PROMETHEUS_METRICS_NAME.getValue(); } @Override @@ -75,7 +75,7 @@ public void stop() { @Override public void handle(HttpExchange exchange) throws IOException { - if (SparkOperatorConf.EnablePrometheusTextBasedFormat.getValue()) { + if (enablePrometheusTextBasedFormat) { sendMessage( exchange, HTTP_OK, @@ -163,7 +163,7 @@ protected String formatGauge(String name, Gauge gauge) { + "# TYPE " + formattedName + " gauge\n" - + sanitize(formattedName) + + formattedName + ' ' + gauge.getValue() + "\n\n"; @@ -173,13 +173,14 @@ protected String formatGauge(String name, Gauge gauge) { protected String formatCounter(String name, Counter counter) { if (counter != null) { + String formattedName = sanitize(name); return "# HELP " - + name + + formattedName + " Counter metric\n" + "# TYPE " - + name + + formattedName + " counter\n" - + name + + formattedName + " " + counter.getCount() + "\n\n"; @@ -351,7 +352,7 @@ protected double nanosToSeconds(double nanos) { protected String sanitize(String name) { if (enableSanitizePrometheusMetricsName) { - return name.replaceAll("[^a-zA-Z0-9_:]", "_").toLowerCase(); + return name.replaceAll("[^a-zA-Z0-9_]", "_").toLowerCase(); } return name; } From ec0f9d75120dcb765607e8f6d53e1e4041cdefe4 Mon Sep 17 00:00:00 2001 From: Zhou JIANG Date: Mon, 25 Aug 2025 17:55:05 -0700 Subject: [PATCH 3/5] Group the new props under 'metrics' --- docs/config_properties.md | 4 ++-- .../apache/spark/k8s/operator/config/SparkOperatorConf.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/config_properties.md b/docs/config_properties.md index 94900b12..af216092 100644 --- a/docs/config_properties.md +++ b/docs/config_properties.md @@ -29,8 +29,8 @@ | spark.kubernetes.operator.metrics.clientMetricsEnabled | Boolean | true | false | Enable KubernetesClient metrics for measuring the HTTP traffic to the Kubernetes API Server. Since the metrics is collected via Okhttp interceptors, can be disabled when opt in customized interceptors. | | spark.kubernetes.operator.metrics.clientMetricsGroupByResponseCodeEnabled | Boolean | true | false | When enabled, additional metrics group by http response code group(1xx, 2xx, 3xx, 4xx, 5xx) received from API server will be added. Users can disable it when their monitoring system can combine lower level kubernetes.client.http.response.<3-digit-response-code> metrics. | | spark.kubernetes.operator.metrics.port | Integer | 19090 | false | The port used for checking metrics | - | spark.kubernetes.operator.enablePrometheusTextBasedFormat | Boolean | true | false | Whether or not to enable text-based format for Prometheus 2.0, as recommended by https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format | - | spark.kubernetes.operator.enableSanitizePrometheusMetricsName | Boolean | true | false | Whether or not to enable automatic name sanitizing for all metrics based on best-practice guide from Prometheus https://prometheus.io/docs/practices/naming/ | + | spark.kubernetes.operator.metrics.enablePrometheusTextBasedFormat | Boolean | true | false | Whether or not to enable text-based format for Prometheus 2.0, as recommended by https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format | + | spark.kubernetes.operator.metrics.enableSanitizePrometheusMetricsName | Boolean | true | false | Whether or not to enable automatic name sanitizing for all metrics based on best-practice guide from Prometheus https://prometheus.io/docs/practices/naming/ | | spark.kubernetes.operator.health.probePort | Integer | 19091 | false | The port used for health/readiness check probe status. | | spark.kubernetes.operator.health.sentinelExecutorPoolSize | Integer | 3 | false | Size of executor service in Sentinel Managers to check the health of sentinel resources. | | spark.kubernetes.operator.health.sentinelResourceReconciliationDelaySeconds | Integer | 60 | true | Allowed max time(seconds) between spec update and reconciliation for sentinel resources. | diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java index 478369fc..0d40a702 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java @@ -336,7 +336,7 @@ public final class SparkOperatorConf { public static final ConfigOption ENABLE_PROMETHEUS_TEXT_BASED_FORMAT = ConfigOption.builder() - .key("spark.kubernetes.operator.enablePrometheusTextBasedFormat") + .key("spark.kubernetes.operator.metrics.enablePrometheusTextBasedFormat") .enableDynamicOverride(false) .description( "Whether or not to enable text-based format for Prometheus 2.0, as " @@ -348,7 +348,7 @@ public final class SparkOperatorConf { public static final ConfigOption ENABLE_SANITIZED_PROMETHEUS_METRICS_NAME = ConfigOption.builder() - .key("spark.kubernetes.operator.enableSanitizePrometheusMetricsName") + .key("spark.kubernetes.operator.metrics.enableSanitizePrometheusMetricsName") .enableDynamicOverride(false) .description( "Whether or not to enable automatic name sanitizing for all metrics based on " From 904ba26465e044fd811ab7e092223389679793f5 Mon Sep 17 00:00:00 2001 From: Zhou JIANG Date: Thu, 28 Aug 2025 13:34:59 -0700 Subject: [PATCH 4/5] Fix the naming convention for new properties, and use string concate as possible --- docs/config_properties.md | 4 +- .../operator/config/SparkOperatorConf.java | 8 +-- .../metrics/PrometheusPullModelHandler.java | 53 +++++++++---------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/docs/config_properties.md b/docs/config_properties.md index af216092..f9e2dc0c 100644 --- a/docs/config_properties.md +++ b/docs/config_properties.md @@ -29,8 +29,8 @@ | spark.kubernetes.operator.metrics.clientMetricsEnabled | Boolean | true | false | Enable KubernetesClient metrics for measuring the HTTP traffic to the Kubernetes API Server. Since the metrics is collected via Okhttp interceptors, can be disabled when opt in customized interceptors. | | spark.kubernetes.operator.metrics.clientMetricsGroupByResponseCodeEnabled | Boolean | true | false | When enabled, additional metrics group by http response code group(1xx, 2xx, 3xx, 4xx, 5xx) received from API server will be added. Users can disable it when their monitoring system can combine lower level kubernetes.client.http.response.<3-digit-response-code> metrics. | | spark.kubernetes.operator.metrics.port | Integer | 19090 | false | The port used for checking metrics | - | spark.kubernetes.operator.metrics.enablePrometheusTextBasedFormat | Boolean | true | false | Whether or not to enable text-based format for Prometheus 2.0, as recommended by https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format | - | spark.kubernetes.operator.metrics.enableSanitizePrometheusMetricsName | Boolean | true | false | Whether or not to enable automatic name sanitizing for all metrics based on best-practice guide from Prometheus https://prometheus.io/docs/practices/naming/ | + | spark.kubernetes.operator.metrics.prometheusTextBasedFormatEnabled | Boolean | true | false | Whether or not to enable text-based format for Prometheus 2.0, as recommended by https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format | + | spark.kubernetes.operator.metrics.sanitizePrometheusMetricsNameEnabled | Boolean | true | false | Whether or not to enable automatic name sanitizing for all metrics based on best-practice guide from Prometheus https://prometheus.io/docs/practices/naming/ | | spark.kubernetes.operator.health.probePort | Integer | 19091 | false | The port used for health/readiness check probe status. | | spark.kubernetes.operator.health.sentinelExecutorPoolSize | Integer | 3 | false | Size of executor service in Sentinel Managers to check the health of sentinel resources. | | spark.kubernetes.operator.health.sentinelResourceReconciliationDelaySeconds | Integer | 60 | true | Allowed max time(seconds) between spec update and reconciliation for sentinel resources. | diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java index 0d40a702..b3b352ef 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/config/SparkOperatorConf.java @@ -334,9 +334,9 @@ public final class SparkOperatorConf { .defaultValue(19090) .build(); - public static final ConfigOption ENABLE_PROMETHEUS_TEXT_BASED_FORMAT = + public static final ConfigOption PROMETHEUS_TEXT_BASED_FORMAT_ENABLED = ConfigOption.builder() - .key("spark.kubernetes.operator.metrics.enablePrometheusTextBasedFormat") + .key("spark.kubernetes.operator.metrics.prometheusTextBasedFormatEnabled") .enableDynamicOverride(false) .description( "Whether or not to enable text-based format for Prometheus 2.0, as " @@ -346,9 +346,9 @@ public final class SparkOperatorConf { .defaultValue(true) .build(); - public static final ConfigOption ENABLE_SANITIZED_PROMETHEUS_METRICS_NAME = + public static final ConfigOption SANITIZE_PROMETHEUS_METRICS_NAME_ENABLED = ConfigOption.builder() - .key("spark.kubernetes.operator.metrics.enableSanitizePrometheusMetricsName") + .key("spark.kubernetes.operator.metrics.sanitizePrometheusMetricsNameEnabled") .enableDynamicOverride(false) .description( "Whether or not to enable automatic name sanitizing for all metrics based on " diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java index 23786e3a..7659195e 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java @@ -58,9 +58,9 @@ public PrometheusPullModelHandler(Properties properties, MetricRegistry registry super(properties, registry); this.registry = registry; this.enablePrometheusTextBasedFormat = - SparkOperatorConf.ENABLE_PROMETHEUS_TEXT_BASED_FORMAT.getValue(); + SparkOperatorConf.PROMETHEUS_TEXT_BASED_FORMAT_ENABLED.getValue(); this.enableSanitizePrometheusMetricsName = - SparkOperatorConf.ENABLE_SANITIZED_PROMETHEUS_METRICS_NAME.getValue(); + SparkOperatorConf.SANITIZE_PROMETHEUS_METRICS_NAME_ENABLED.getValue(); } @Override @@ -247,33 +247,30 @@ protected String formatHistogram(String name, Histogram histogram) { protected String formatMeter(String name, Meter meter) { if (meter != null) { - StringBuilder stringBuilder = new StringBuilder(200); String baseName = sanitize(name); - stringBuilder - .append("# HELP ") - .append(baseName) - .append("_total Meter count\n# TYPE ") - .append(baseName) - .append("_total counter\n") - .append(baseName) - .append("_total ") - .append(meter.getCount()) - .append("\n\n# TYPE ") - .append(baseName) - .append("_rate gauge\n") - .append(baseName) - .append("_rate{interval=\"1m\"} ") - .append(meter.getOneMinuteRate()) - .append('\n') - .append(baseName) - .append("_rate{interval=\"5m\"} ") - .append(meter.getFiveMinuteRate()) - .append('\n') - .append(baseName) - .append("_rate{interval=\"15m\"} ") - .append(meter.getFifteenMinuteRate()) - .append("\n\n"); - return stringBuilder.toString(); + return "# HELP " + + baseName + + "_total Meter count\n# TYPE " + + baseName + + "_total counter\n" + + baseName + + "_total " + + meter.getCount() + + "\n\n# TYPE " + + baseName + + "_rate gauge\n" + + baseName + + "_rate{interval=\"1m\"} " + + meter.getOneMinuteRate() + + '\n' + + baseName + + "_rate{interval=\"5m\"} " + + meter.getFiveMinuteRate() + + '\n' + + baseName + + "_rate{interval=\"15m\"} " + + meter.getFifteenMinuteRate() + + "\n\n"; } return null; } From a4ddc93868a4866d36d3e5932bfa12fca14fb462 Mon Sep 17 00:00:00 2001 From: Zhou JIANG Date: Thu, 28 Aug 2025 14:13:24 -0700 Subject: [PATCH 5/5] Fix histogram and timer handling to use quantile instead of le bucket --- .../metrics/PrometheusPullModelHandler.java | 212 +++++++++--------- .../PrometheusPullModelHandlerTest.java | 21 +- 2 files changed, 117 insertions(+), 116 deletions(-) diff --git a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java index 7659195e..8592e185 100644 --- a/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java +++ b/spark-operator/src/main/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandler.java @@ -190,57 +190,54 @@ protected String formatCounter(String name, Counter counter) { protected String formatHistogram(String name, Histogram histogram) { if (histogram != null && histogram.getSnapshot() != null) { - StringBuilder stringBuilder = new StringBuilder(300); String baseName = sanitize(name); Snapshot snap = histogram.getSnapshot(); long count = histogram.getCount(); - stringBuilder - .append("# HELP ") - .append(baseName) - .append(" Histogram metric\n# TYPE ") - .append(baseName) - .append(" histogram\n"); boolean isNanosHistogram = baseName.contains("nanos"); if (isNanosHistogram) { baseName = nanosMetricsNameToSeconds(baseName); } - appendBucket( - stringBuilder, - baseName, - "le=\"0.5\"", - isNanosHistogram ? nanosToSeconds(snap.getMedian()) : snap.getMean()); - appendBucket( - stringBuilder, - baseName, - "le=\"0.75\"", - isNanosHistogram ? nanosToSeconds(snap.get75thPercentile()) : snap.get75thPercentile()); - appendBucket( - stringBuilder, - baseName, - "le=\"0.95\"", - isNanosHistogram ? nanosToSeconds(snap.get95thPercentile()) : snap.get95thPercentile()); - appendBucket( - stringBuilder, - baseName, - "le=\"0.98\"", - isNanosHistogram ? nanosToSeconds(snap.get98thPercentile()) : snap.get98thPercentile()); - appendBucket( - stringBuilder, - baseName, - "le=\"0.99\"", - isNanosHistogram ? nanosToSeconds(snap.get99thPercentile()) : snap.get99thPercentile()); double sum = isNanosHistogram ? nanosToSeconds(snap.getMean() * count) : snap.getMean() * count; - stringBuilder - .append(baseName) - .append("_count ") - .append(count) - .append('\n') - .append(baseName) - .append("_sum ") - .append(sum) - .append("\n\n"); - return stringBuilder.toString(); + return "# HELP " + + baseName + + " Histogram metric\n# TYPE " + + baseName + + " summary\n" + + baseName + + "{quantile=\"0.5\"} " + + (isNanosHistogram ? nanosToSeconds(snap.getMedian()) : snap.getMean()) + + "\n" + + baseName + + "{quantile=\"0.75\"} " + + (isNanosHistogram ? nanosToSeconds(snap.get75thPercentile()) : snap.get75thPercentile()) + + "\n" + + baseName + + "{quantile=\"0.95\"} " + + (isNanosHistogram ? nanosToSeconds(snap.get95thPercentile()) : snap.get95thPercentile()) + + "\n" + + baseName + + "{quantile=\"0.98\"} " + + (isNanosHistogram ? nanosToSeconds(snap.get98thPercentile()) : snap.get98thPercentile()) + + "\n" + + baseName + + "{quantile=\"0.99\"} " + + (isNanosHistogram ? nanosToSeconds(snap.get99thPercentile()) : snap.get99thPercentile()) + + "\n" + + baseName + + "{quantile=\"0.999\"} " + + (isNanosHistogram + ? nanosToSeconds(snap.get999thPercentile()) + : snap.get99thPercentile()) + + "\n" + + baseName + + "_count " + + count + + "\n" + + baseName + + "_sum " + + sum + + "\n\n"; } return null; } @@ -260,15 +257,15 @@ protected String formatMeter(String name, Meter meter) { + baseName + "_rate gauge\n" + baseName - + "_rate{interval=\"1m\"} " + + "_m1_rate " + meter.getOneMinuteRate() - + '\n' + + "\n" + baseName - + "_rate{interval=\"5m\"} " + + "_m5_rate " + meter.getFiveMinuteRate() - + '\n' + + "\n" + baseName - + "_rate{interval=\"15m\"} " + + "_m15_rate " + meter.getFifteenMinuteRate() + "\n\n"; } @@ -277,72 +274,75 @@ protected String formatMeter(String name, Meter meter) { protected String formatTimer(String name, Timer timer) { if (timer != null && timer.getSnapshot() != null) { - StringBuilder stringBuilder = new StringBuilder(300); String baseName = sanitize(name); Snapshot snap = timer.getSnapshot(); long count = timer.getCount(); - stringBuilder - .append("# HELP ") - .append(baseName) - .append("_duration_seconds Timer histogram\n# TYPE ") - .append(baseName) - .append("_duration_seconds histogram\n"); - appendBucket( - stringBuilder, - baseName + "_duration_seconds", - "le=\"0.5\"", - nanosToSeconds(snap.getMedian())); - appendBucket( - stringBuilder, - baseName + "_duration_seconds", - "le=\"0.75\"", - nanosToSeconds(snap.get75thPercentile())); - appendBucket( - stringBuilder, - baseName + "_duration_seconds", - "le=\"0.95\"", - nanosToSeconds(snap.get95thPercentile())); - appendBucket( - stringBuilder, - baseName + "_duration_seconds", - "le=\"0.98\"", - nanosToSeconds(snap.get98thPercentile())); - appendBucket( - stringBuilder, - baseName + "_duration_seconds", - "le=\"0.99\"", - nanosToSeconds(snap.get99thPercentile())); - stringBuilder - .append(baseName) - .append("_duration_seconds_count ") - .append(count) - .append('\n') - .append(baseName) - .append("_duration_seconds_sum ") - .append(nanosToSeconds(snap.getMean() * count)) - .append("\n\n# TYPE ") - .append(baseName) - .append("_calls_total counter\n") - .append(baseName) - .append("_calls_total ") - .append(count) - .append("\n\n"); - return stringBuilder.toString(); + return "# HELP " + + baseName + + "_duration_seconds Timer summary\n# TYPE " + + baseName + + "_duration_seconds summary\n" + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.5\"} " + + nanosToSeconds(snap.getMedian()) + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.75\"} " + + nanosToSeconds(snap.get75thPercentile()) + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.95\"} " + + nanosToSeconds(snap.get95thPercentile()) + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.98\"} " + + nanosToSeconds(snap.get98thPercentile()) + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.99\"} " + + nanosToSeconds(snap.get99thPercentile()) + + "\n" + + baseName + + "_duration_seconds" + + "{quantile=\"0.999\"} " + + nanosToSeconds(snap.get999thPercentile()) + + "\n" + + baseName + + "_duration_seconds_count " + + count + + "\n" + + baseName + + "_duration_seconds_sum " + + nanosToSeconds(snap.getMean() * count) + + "\n\n# TYPE " + + baseName + + " gauge\n" + + baseName + + "_count " + + count + + "\n" + + baseName + + "_m1_rate " + + timer.getOneMinuteRate() + + "\n" + + baseName + + "_m5_rate " + + timer.getFiveMinuteRate() + + "\n" + + baseName + + "_m15_rate " + + timer.getFifteenMinuteRate() + + "\n\n"; } return null; } - protected void appendBucket( - StringBuilder builder, String baseName, String leLabel, double value) { - builder - .append(baseName) - .append("_bucket{") - .append(leLabel) - .append("} ") - .append(value) - .append('\n'); - } - protected double nanosToSeconds(double nanos) { return nanos / 1_000_000_000.0; } diff --git a/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java b/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java index 406d0386..7efdc23f 100644 --- a/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java +++ b/spark-operator/src/test/java/org/apache/spark/k8s/operator/metrics/PrometheusPullModelHandlerTest.java @@ -21,6 +21,8 @@ import static org.junit.jupiter.api.Assertions.*; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.Objects; import java.util.Properties; @@ -70,7 +72,7 @@ void testFormatMetricsSnapshotIncludesHistogram() throws Exception { String output = handler.formatMetricsSnapshot(); - assertTrue(output.contains("# TYPE foo_histogram histogram")); + assertTrue(output.contains("# TYPE foo_histogram summary")); assertTrue(output.contains("foo_histogram_count 2")); assertTrue(output.contains("foo_histogram_sum")); } @@ -87,7 +89,7 @@ void testFormatMetricsSnapshotIncludesHistogramWithNanos() throws Exception { String output = handler.formatMetricsSnapshot(); - assertTrue(output.contains("# TYPE foo_nanos_histogram histogram")); + assertTrue(output.contains("# TYPE foo_seconds_histogram summary")); assertTrue(output.contains("foo_seconds_histogram_count 3")); assertTrue(output.contains("foo_seconds_histogram_sum 0.001572032")); } @@ -103,7 +105,7 @@ void testFormatMetricsSnapshotIncludesMeter() throws Exception { String output = handler.formatMetricsSnapshot(); assertTrue(output.contains("# TYPE foo_meter_total counter")); assertTrue(output.contains("foo_meter_total 3")); - assertTrue(output.contains("foo_meter_rate{interval=\"1m\"}")); + assertTrue(output.contains("foo_meter_m1_rate")); } @Test @@ -111,16 +113,15 @@ void testFormatMetricsSnapshotIncludesTimer() throws Exception { MetricRegistry registry = new MetricRegistry(); Timer timer = registry.timer("foo_timer"); - Timer.Context context = timer.time(); - Thread.sleep(10); - context.stop(); - + timer.update(Duration.of(500, ChronoUnit.MILLIS)); + timer.update(Duration.of(1000, ChronoUnit.MILLIS)); PrometheusPullModelHandler handler = new PrometheusPullModelHandler(new Properties(), registry); String output = handler.formatMetricsSnapshot(); - assertTrue(output.contains("# TYPE foo_timer_duration_seconds histogram")); - assertTrue(output.contains("foo_timer_duration_seconds_count 1")); - assertTrue(output.contains("foo_timer_duration_seconds_sum")); + assertTrue(output.contains("# TYPE foo_timer_duration_seconds summary")); + assertTrue(output.contains("foo_timer_duration_seconds_count 2")); + assertTrue(output.contains("foo_timer_duration_seconds_sum 1.5")); + assertTrue(output.contains("foo_timer_m1_rate")); } @Test