Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions config/charts/epplib/templates/_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ data:
- type: queue-scorer
- type: kv-cache-utilization-scorer
- type: prefix-cache-scorer
- type: metrics-data-source
parameters:
scheme: {{ .Values.inferenceExtension.metricsDataSource.scheme | default "http" | quote }}
path: {{ .Values.inferenceExtension.metricsDataSource.path | default "/metrics" | quote }}
insecureSkipVerify: {{ .Values.inferenceExtension.metricsDataSource.insecureSkipVerify | default true }}
- type: core-metrics-extractor
{{- if .Values.inferenceExtension.latencyPredictor.enabled }}
- type: predicted-latency-scorer
parameters:
Expand Down
10 changes: 10 additions & 0 deletions config/charts/inferencepool/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,16 @@ inferenceExtension:
# common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
enabled: false

# Metrics DataSource Configuration
# These values configure how the EPP scrapes metrics from model server pods.
metricsDataSource:
# scheme is the HTTP scheme used to scrape metrics (http or https).
scheme: "http"
# path is the URL path on the model server pod that exposes Prometheus metrics.
path: "/metrics"
# insecureSkipVerify disables TLS certificate verification when scheme is https.
insecureSkipVerify: true

inferencePool:
targetPorts:
- number: 8000
Expand Down
10 changes: 10 additions & 0 deletions config/charts/standalone/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,16 @@ inferenceExtension:
# common latencyPredictor setting exists in config/charts/inference-extension/values.yaml
enabled: false

# Metrics DataSource Configuration
# These values configure how the EPP scrapes metrics from model server pods.
metricsDataSource:
# scheme is the HTTP scheme used to scrape metrics (http or https).
scheme: "http"
# path is the URL path on the model server pod that exposes Prometheus metrics.
path: "/metrics"
# insecureSkipVerify disables TLS certificate verification when scheme is https.
insecureSkipVerify: true

# Options: ["gke"]
provider:
name: none
Expand Down
38 changes: 10 additions & 28 deletions pkg/epp/backend/metrics/podmetrics_parity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package metrics

import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
Expand All @@ -30,7 +31,6 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/spf13/pflag"
"github.com/stretchr/testify/assert"

fwkdl "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/datalayer"
Expand Down Expand Up @@ -316,15 +316,13 @@ func parseWithDatalayerMetrics(t *testing.T, ctx context.Context, urlStr string)
return nil, fmt.Errorf("failed to parse URL: %w", err)
}

cleanup := setupTestFlags(t) // set-up test flags and restore on cleanup
defer cleanup()

// CLI flags to match the test server URL
if err := pflag.CommandLine.Set("model-server-metrics-scheme", parsedURL.Scheme); err != nil {
return nil, fmt.Errorf("failed to set scheme flag: %w", err)
}
if err := pflag.CommandLine.Set("model-server-metrics-path", parsedURL.Path); err != nil {
return nil, fmt.Errorf("failed to set path flag: %w", err)
// Pass scheme and path directly as plugin parameters — no CLI flags needed.
params, err := json.Marshal(map[string]any{
"scheme": parsedURL.Scheme,
"path": parsedURL.Path,
})
if err != nil {
return nil, fmt.Errorf("failed to marshal datasource parameters: %w", err)
}

mapping, err := metricextractor.NewMapping(
Expand All @@ -350,8 +348,8 @@ func parseWithDatalayerMetrics(t *testing.T, ctx context.Context, urlStr string)

plugin, err := sourcemetrics.MetricsDataSourceFactory(
"test-metrics-source",
nil, // use default parameters from flags
nil, // no plugin handle needed for test
params, // configure scheme and path via parameters
nil, // no plugin handle needed for test
)
if err != nil {
return nil, fmt.Errorf("failed to create data source: %w", err)
Expand Down Expand Up @@ -381,22 +379,6 @@ func parseWithDatalayerMetrics(t *testing.T, ctx context.Context, urlStr string)
return endpoint.GetMetrics(), nil
}

// setupTestFlags creates a temporary FlagSet for testing and returns a cleanup function
func setupTestFlags(t *testing.T) func() {
t.Helper()
originalFlags := pflag.CommandLine
testFlags := pflag.NewFlagSet("test", pflag.ContinueOnError)
pflag.CommandLine = testFlags

testFlags.String("model-server-metrics-scheme", "http", "Protocol scheme used in scraping metrics from endpoints")
testFlags.String("model-server-metrics-path", "/metrics", "URL path used in scraping metrics from endpoints")
testFlags.Bool("model-server-metrics-https-insecure-skip-verify", false, "Skip TLS verification for HTTPS metrics endpoints")

return func() {
pflag.CommandLine = originalFlags
}
}

// createMockServer creates an HTTP test server that serves Prometheus metrics
func createMockServer(metrics []MetricMock) *httptest.Server {
reg := prometheus.NewRegistry()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

const (
MetricsExtractorType = "model-server-protocol-metrics"
MetricsExtractorType = "core-metrics-extractor"
)

// Configuration parameters for metrics data source and extractor.
Expand Down
84 changes: 48 additions & 36 deletions pkg/epp/framework/plugins/datalayer/source/metrics/datasource.go
Copy link
Copy Markdown
Contributor

@ahg-g ahg-g Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this got reverted already

Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,23 @@ import (

"github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"github.com/spf13/pflag"
flag "github.com/spf13/pflag"
Comment thread
Mohamedma96 marked this conversation as resolved.
Outdated

fwkplugin "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/plugin"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/plugins/datalayer/source/http"
)

const MetricsDataSourceType = "metrics-data-source"

// Data source configuration parameters
// Default values for the metrics data source configuration.
const (
defaultMetricsScheme = "http"
defaultMetricsPath = "/metrics"
defaultMetricsInsecureSkipVerify = true
)

// metricsDatasourceParams holds the configuration parameters for the metrics data source plugin.
// These values can be specified in the EndpointPickerConfig under the plugin's `parameters` field.
type metricsDatasourceParams struct {
// Scheme defines the protocol scheme used in metrics retrieval (e.g., "http").
Scheme string `json:"scheme"`
Expand Down Expand Up @@ -60,68 +68,72 @@ func MetricsDataSourceFactory(name string, parameters json.RawMessage, handle fw
name, parseMetrics, PrometheusMetricType)
}

// Names of CLI flags in main
// These flags are registered in options.go (server package) and marked as deprecated there.
// They are kept for one release cycle to give users time to migrate their configuration
// to the EndpointPickerConfig `parameters` field (metricsDatasourceParams).
// They will be removed in a future release.
//
// TODO:
//
// 1. Consider having a cli package with all flag names and constants?
// Can't use values from runserver as this creates an import cycle with datalayer.
// Given that relevant issues/PRs have been closed so may be able to remove the cycle?
// Comment from runserver package (regarding TestPodMetricsClient *backendmetrics.FakePodMetricsClient)
// This should only be used in tests. We won't need this once we do not inject metrics in the tests.
// TODO:(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/432) Cleanup
//
// 2. Deprecation notice on these flags being moved to the configuration file
// TODO: Remove these constants and defaultDataSourceConfigParams() once the deprecated flags
// are removed from options.go.
// Note: these flag names are duplicated here (rather than imported from the server package)
// to avoid an import cycle between the datalayer plugin and the server/runserver packages.
const (
modelServerMetricsPathFlag = "model-server-metrics-path"
modelServerMetricsSchemeFlag = "model-server-metrics-scheme"
modelServerMetricsInsecureSkipVerifyFlag = "model-server-metrics-https-insecure-skip-verify"
)

// return the default configuration state. The defaults are populated from
// existing command line flags.
// DataSource parameters values - Priority (lowest → highest):
// 1. Built-in defaults (defaultMetricsScheme / defaultMetricsPath / defaultMetricsInsecureSkipVerify)
// 2. Deprecated CLI flag value (when the flag is registered and has been set by the operator)
// 3. Explicit plugin `parameters` in EndpointPickerConfig
func defaultDataSourceConfigParams() (*metricsDatasourceParams, error) {
cfg := &metricsDatasourceParams{}
cfg := &metricsDatasourceParams{
Scheme: defaultMetricsScheme,
Path: defaultMetricsPath,
InsecureSkipVerify: defaultMetricsInsecureSkipVerify,
}

scheme, err := fromStringFlag(modelServerMetricsSchemeFlag)
if err != nil {
return nil, err
if scheme, ok := fromStringFlag(modelServerMetricsSchemeFlag); ok {
cfg.Scheme = scheme
}
cfg.Scheme = scheme

path, err := fromStringFlag(modelServerMetricsPathFlag)
if err != nil {
return nil, err
if path, ok := fromStringFlag(modelServerMetricsPathFlag); ok {
cfg.Path = path
}
cfg.Path = path

insecure, err := fromBoolFlag(modelServerMetricsInsecureSkipVerifyFlag)
if err != nil {
if insecure, ok, err := fromBoolFlag(modelServerMetricsInsecureSkipVerifyFlag); err != nil {
return nil, err
} else if ok {
cfg.InsecureSkipVerify = insecure
}
cfg.InsecureSkipVerify = insecure

return cfg, nil
}

func fromStringFlag(name string) (string, error) {
f := pflag.Lookup(name)
// fromStringFlag returns the value of a registered pflag string flag.
// The second return value is false when the flag is not registered; no error is returned in that case.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: comment should be changed to reflect ... false when the flag is not registered or not explicitly set by the user?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

func fromStringFlag(name string) (string, bool) {
f := flag.Lookup(name)
if f == nil {
return "", fmt.Errorf("flag not found: %s", name)
return "", false
}
return f.Value.String(), nil
return f.Value.String(), true
}

func fromBoolFlag(name string) (bool, error) {
f := pflag.Lookup(name)
// fromBoolFlag returns the value of a registered pflag bool flag.
// The second return value is false when the flag is not registered; no error is returned in that case.
// An error is returned only when the flag exists but its value cannot be parsed as a bool.
func fromBoolFlag(name string) (bool, bool, error) {
f := flag.Lookup(name)
if f == nil {
return false, fmt.Errorf("flag not found: %s", name)
return false, false, nil
}
b, err := strconv.ParseBool(f.Value.String())
if err != nil {
return false, fmt.Errorf("invalid bool flag %q: %w", name, err)
return false, false, fmt.Errorf("invalid bool flag %q: %w", name, err)
}
return b, nil
return b, true, nil
}

func parseMetrics(data io.Reader) (any, error) {
Expand Down
3 changes: 3 additions & 0 deletions pkg/epp/server/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,16 @@ func (opts *Options) AddFlags(fs *pflag.FlagSet) {
"Disables respecting the x-gateway-destination-endpoint-subset metadata for dispatching requests in EPP.")
fs.StringVar(&opts.ModelServerMetricsScheme, "model-server-metrics-scheme", opts.ModelServerMetricsScheme,
"Protocol scheme used in scraping metrics from endpoints.")
_ = fs.MarkDeprecated("model-server-metrics-scheme", "This flag is deprecated. Configure via EndpointPickerConfig data layer plugin parameters instead.")
fs.StringVar(&opts.ModelServerMetricsPath, "model-server-metrics-path", opts.ModelServerMetricsPath,
"URL path used in scraping metrics from endpoints.")
_ = fs.MarkDeprecated("model-server-metrics-path", "This flag is deprecated. Configure via EndpointPickerConfig data layer plugin parameters instead.")
fs.IntVar(&opts.ModelServerMetricsPort, "model-server-metrics-port", opts.ModelServerMetricsPort,
"Port to scrape metrics from endpoints. Set to the InferencePool.Spec.TargetPorts[0].Number if not defined.")
_ = fs.MarkDeprecated("model-server-metrics-port", "This flag is deprecated and will be removed in a future release.")
fs.BoolVar(&opts.ModelServerMetricsHTTPSInsecure, "model-server-metrics-https-insecure-skip-verify", opts.ModelServerMetricsHTTPSInsecure,
"Disable certificate verification when using 'https' scheme for 'model-server-metrics-scheme'.")
_ = fs.MarkDeprecated("model-server-metrics-https-insecure-skip-verify", "This flag is deprecated. Configure via EndpointPickerConfig data layer plugin parameters instead.")
fs.DurationVar(&opts.RefreshMetricsInterval, "refresh-metrics-interval", opts.RefreshMetricsInterval, "Interval to refresh metrics.")
fs.DurationVar(&opts.RefreshPrometheusMetricsInterval, "refresh-prometheus-metrics-interval", opts.RefreshPrometheusMetricsInterval,
"Interval to flush Prometheus metrics.")
Expand Down
8 changes: 4 additions & 4 deletions site-src/implementations/model-servers.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ kind: EndpointPickerConfig
featureGates:
- dataLayer
plugins:
- name: model-server-protocol-metrics
type: model-server-protocol-metrics
- name: core-metrics-extractor
type: core-metrics-extractor
parameters:
defaultEngine: "sglang" # Pods without engine label will use SGLang metrics
```
Expand All @@ -96,8 +96,8 @@ kind: EndpointPickerConfig
featureGates:
- dataLayer
plugins:
- name: model-server-protocol-metrics
type: model-server-protocol-metrics
- name: core-metrics-extractor
type: core-metrics-extractor
parameters:
engineLabelKey: "inference.networking.k8s.io/engine-type" # Pod label key (optional, this is the default)
defaultEngine: "vllm" # Which engine to use for Pods without engine label
Expand Down