Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 9 additions & 27 deletions pkg/epp/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,28 +182,17 @@ var (
},
[]string{},
)
SchedulerPluginProcessingLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: InferenceExtension,
Name: "scheduler_plugin_duration_seconds",
Help: metricsutil.HelpMsgWithStability("Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.", compbasemetrics.ALPHA),
Buckets: []float64{
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
},
},
[]string{"plugin_type", "plugin_name"},
)
Comment on lines -185 to -195
Copy link
Contributor

@JeffLuoo JeffLuoo Jul 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR LGTM'd but deleting an existing metric should be careful when we promote the metric to Beta in the future: https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-metric

The deprecated metric should be kept for 2 releases or 8 months. Since current metric is still Alpha so we are good with removing the metric directly. We need to track the progress of marking metrics as beta for stability of metrics. I will create a tracker for it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeffLuoo what is the expectation when adding new metrics? use alpha for a while and then promote to beta?
is there also a promotion from beta to "v1" or similar?
additionally, what is the required time to have a metric in alpha before promoting it to beta?
I expect more metrics to be added as we continue to make progress and I think a clear documentation of these points could be useful (e.g., metrics management guide).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

v1 in Kubernetes will be Stable. Hence the cycle will be Alpha -> Beta -> Stable.

There isn't rigid requirement to say a metric has to be promoted to next cycle in xx months. It's all about project owner to determine when to promote. Higher stability level will just mean less breaking change so external dependencies like alerts and dashboards can rely on it.

+1 that a "metrics management guide" is recommended to manage the lifecycle of the metric.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JeffLuoo would you be interested in taking an AI to create this metrics management guide?


RequestControlPluginProcessingLatencies = prometheus.NewHistogramVec(
PluginProcessingLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: InferenceExtension,
Name: "request_control_plugin_duration_seconds",
Help: metricsutil.HelpMsgWithStability("RequestControl plugin processing latency distribution in seconds for each plugin type and plugin name.", compbasemetrics.ALPHA),
Name: "plugin_duration_seconds",
Help: metricsutil.HelpMsgWithStability("Plugin processing latency distribution in seconds for each extension point, plugin type and plugin name.", compbasemetrics.ALPHA),
Buckets: []float64{
0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1,
},
},
[]string{"plugin_type", "plugin_name"},
[]string{"extension_point", "plugin_type", "plugin_name"},
)

// Prefix indexer Metrics
Expand Down Expand Up @@ -265,9 +254,8 @@ func Register(customCollectors ...prometheus.Collector) {
metrics.Registry.MustRegister(inferencePoolAvgKVCache)
metrics.Registry.MustRegister(inferencePoolAvgQueueSize)
metrics.Registry.MustRegister(inferencePoolReadyPods)
metrics.Registry.MustRegister(SchedulerPluginProcessingLatencies)
metrics.Registry.MustRegister(SchedulerE2ELatency)
metrics.Registry.MustRegister(RequestControlPluginProcessingLatencies)
metrics.Registry.MustRegister(PluginProcessingLatencies)
metrics.Registry.MustRegister(InferenceExtensionInfo)
metrics.Registry.MustRegister(PrefixCacheSize)
metrics.Registry.MustRegister(PrefixCacheHitRatio)
Expand All @@ -292,9 +280,8 @@ func Reset() {
inferencePoolAvgKVCache.Reset()
inferencePoolAvgQueueSize.Reset()
inferencePoolReadyPods.Reset()
SchedulerPluginProcessingLatencies.Reset()
SchedulerE2ELatency.Reset()
RequestControlPluginProcessingLatencies.Reset()
PluginProcessingLatencies.Reset()
InferenceExtensionInfo.Reset()
PrefixCacheSize.Reset()
PrefixCacheHitRatio.Reset()
Expand Down Expand Up @@ -396,19 +383,14 @@ func RecordInferencePoolReadyPods(name string, runningPods float64) {
inferencePoolReadyPods.WithLabelValues(name).Set(runningPods)
}

// RecordSchedulerPluginProcessingLatency records the processing latency for a scheduler plugin.
func RecordSchedulerPluginProcessingLatency(pluginType, pluginName string, duration time.Duration) {
SchedulerPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds())
}

// RecordSchedulerE2ELatency records the end-to-end scheduling latency.
func RecordSchedulerE2ELatency(duration time.Duration) {
SchedulerE2ELatency.WithLabelValues().Observe(duration.Seconds())
}

// RecordRequestControlPluginProcessingLatency records the processing latency for a request-control plugin.
func RecordRequestControlPluginProcessingLatency(pluginType, pluginName string, duration time.Duration) {
RequestControlPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds())
// RecordPluginProcessingLatency records the processing latency for a plugin.
func RecordPluginProcessingLatency(extensionPoint, pluginType, pluginName string, duration time.Duration) {
PluginProcessingLatencies.WithLabelValues(extensionPoint, pluginType, pluginName).Observe(duration.Seconds())
}

// RecordPrefixCacheSize records the size of the prefix indexer in megabytes.
Expand Down
43 changes: 24 additions & 19 deletions pkg/epp/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,11 +559,12 @@ func TestInferencePoolMetrics(t *testing.T) {
}
}

func TestSchedulerPluginProcessingLatencies(t *testing.T) {
func TestPluginProcessingLatencies(t *testing.T) {
type pluginLatency struct {
pluginType string
pluginName string
duration time.Duration
extensionPoint string
pluginType string
pluginName string
duration time.Duration
}
scenarios := []struct {
name string
Expand All @@ -573,24 +574,28 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) {
name: "multiple plugins",
latencies: []pluginLatency{
{
pluginType: "PostSchedule",
pluginName: "PluginB",
duration: 200 * time.Millisecond,
extensionPoint: "ProfilePicker",
pluginType: "ProfileHandler",
pluginName: "PluginB",
duration: 200 * time.Millisecond,
},
{
pluginType: "Filter",
pluginName: "PluginC",
duration: 50 * time.Millisecond,
extensionPoint: "Filter",
pluginType: "TestFilter",
pluginName: "PluginC",
duration: 50 * time.Millisecond,
},
{
pluginType: "Scorer",
pluginName: "PluginD",
duration: 10 * time.Millisecond,
extensionPoint: "Scorer",
pluginType: "TestScorer",
pluginName: "PluginD",
duration: 10 * time.Millisecond,
},
{
pluginType: "Picker",
pluginName: "PluginE",
duration: 10 * time.Microsecond,
extensionPoint: "Picker",
pluginType: "TestPicker",
pluginName: "PluginE",
duration: 10 * time.Microsecond,
},
},
},
Expand All @@ -599,10 +604,10 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) {
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
for _, latency := range scenario.latencies {
RecordSchedulerPluginProcessingLatency(latency.pluginType, latency.pluginName, latency.duration)
RecordPluginProcessingLatency(latency.extensionPoint, latency.pluginType, latency.pluginName, latency.duration)
}

wantPluginLatencies, err := os.Open("testdata/scheduler_plugin_processing_latencies_metric")
wantPluginLatencies, err := os.Open("testdata/plugin_processing_latencies_metric")
defer func() {
if err := wantPluginLatencies.Close(); err != nil {
t.Error(err)
Expand All @@ -611,7 +616,7 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if err := testutil.GatherAndCompare(metrics.Registry, wantPluginLatencies, "inference_extension_scheduler_plugin_duration_seconds"); err != nil {
if err := testutil.GatherAndCompare(metrics.Registry, wantPluginLatencies, "inference_extension_plugin_duration_seconds"); err != nil {
t.Error(err)
}
})
Expand Down
54 changes: 54 additions & 0 deletions pkg/epp/metrics/testdata/plugin_processing_latencies_metric
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# HELP inference_extension_plugin_duration_seconds [ALPHA] Plugin processing latency distribution in seconds for each extension point, plugin type and plugin name.
# TYPE inference_extension_plugin_duration_seconds histogram
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.01"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.02"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.05"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.1"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="+Inf"} 1
inference_extension_plugin_duration_seconds_sum{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler"} 0.2
inference_extension_plugin_duration_seconds_count{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.01"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.02"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.05"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.1"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="+Inf"} 1
inference_extension_plugin_duration_seconds_sum{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter"} 0.05
inference_extension_plugin_duration_seconds_count{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.001"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.002"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.005"} 0
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.01"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.02"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.05"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.1"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="+Inf"} 1
inference_extension_plugin_duration_seconds_sum{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer"} 0.01
inference_extension_plugin_duration_seconds_count{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0001"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0002"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0005"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.001"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.002"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.005"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.01"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.02"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.05"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.1"} 1
inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="+Inf"} 1
inference_extension_plugin_duration_seconds_sum{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker"} 1e-05
inference_extension_plugin_duration_seconds_count{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker"} 1
Loading