Skip to content

Commit 897367d

Browse files
committed
feat: add metricsPort parameter to metrics-data-source plugin
Allow configuring a separate port for scraping model server metrics via EndpointPickerConfig plugin parameters. This addresses the gap left by the deprecation of --model-server-metrics-port (PR kubernetes-sigs#1886, kubernetes-sigs#2441) which had no replacement for the port configuration. When metricsPort is set in the metrics-data-source plugin parameters, it overrides the inference port encoded in the endpoint's MetricsHost. This enables deployments where model servers expose metrics on a separate port (e.g., vLLM with --metrics-port) from inference traffic, which is required in Istio mTLS STRICT environments. Related: kubernetes-sigs#1396, kubernetes-sigs#1556
1 parent d6bd165 commit 897367d

4 files changed

Lines changed: 58 additions & 20 deletions

File tree

pkg/epp/framework/plugins/datalayer/source/http/datasource.go

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,32 @@ import (
2121
"crypto/tls"
2222
"fmt"
2323
"io"
24+
"net"
2425
"net/url"
2526
"reflect"
27+
"strconv"
2628

2729
fwkdl "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/datalayer"
2830
fwkplugin "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/framework/interface/plugin"
2931
)
3032

3133
// HTTPDataSource is a data source that receives its data using HTTP client.
3234
type HTTPDataSource struct {
33-
typedName fwkplugin.TypedName
34-
scheme string // scheme to use
35-
path string // path to use
35+
typedName fwkplugin.TypedName
36+
scheme string // scheme to use
37+
path string // path to use
38+
metricsPort int // when non-zero, overrides the port in MetricsHost for scraping
3639

3740
client Client // client (e.g. a wrapped http.Client) used to get data
3841
parser func(io.Reader) (any, error)
3942
outputType reflect.Type
4043
}
4144

4245
// NewHTTPDataSource returns a new data source, configured with
43-
// the provided scheme, path and certificate verification parameters.
44-
func NewHTTPDataSource(scheme string, path string, skipCertVerification bool, pluginType string,
46+
// the provided scheme, path, certificate verification parameters, and an optional metrics port.
47+
// When metricsPort is non-zero, it overrides the port in the endpoint's MetricsHost for scraping.
48+
// Pass 0 to use the port already encoded in MetricsHost (default behavior).
49+
func NewHTTPDataSource(scheme string, path string, skipCertVerification bool, metricsPort int, pluginType string,
4550
pluginName string, parser func(io.Reader) (any, error), outputType reflect.Type) (*HTTPDataSource, error) {
4651
if scheme != "http" && scheme != "https" {
4752
return nil, fmt.Errorf("unsupported scheme: %s", scheme)
@@ -59,11 +64,12 @@ func NewHTTPDataSource(scheme string, path string, skipCertVerification bool, pl
5964
Type: pluginType,
6065
Name: pluginName,
6166
},
62-
scheme: scheme,
63-
path: path,
64-
client: defaultClient,
65-
parser: parser,
66-
outputType: outputType,
67+
scheme: scheme,
68+
path: path,
69+
metricsPort: metricsPort,
70+
client: defaultClient,
71+
parser: parser,
72+
outputType: outputType,
6773
}
6874
return dataSrc, nil
6975
}
@@ -90,9 +96,16 @@ func (dataSrc *HTTPDataSource) Poll(ctx context.Context, ep fwkdl.Endpoint) (any
9096
}
9197

9298
func (dataSrc *HTTPDataSource) getEndpoint(ep Addressable) *url.URL {
99+
host := ep.GetMetricsHost()
100+
if dataSrc.metricsPort != 0 {
101+
ip, _, err := net.SplitHostPort(host)
102+
if err == nil {
103+
host = net.JoinHostPort(ip, strconv.Itoa(dataSrc.metricsPort))
104+
}
105+
}
93106
return &url.URL{
94107
Scheme: dataSrc.scheme,
95-
Host: ep.GetMetricsHost(),
108+
Host: host,
96109
Path: dataSrc.path,
97110
}
98111
}

pkg/epp/framework/plugins/datalayer/source/metrics/datasource.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,19 @@ type metricsDatasourceParams struct {
4848
Path string `json:"path"`
4949
// InsecureSkipVerify defines whether model server certificate should be verified or not.
5050
InsecureSkipVerify bool `json:"insecureSkipVerify"`
51+
// MetricsPort defines the port to use for scraping metrics from model server pods.
52+
// When set, this overrides the inference port encoded in the endpoint's MetricsHost.
53+
// Useful when the model server exposes metrics on a separate port from inference
54+
// (e.g., vLLM with --metrics-port 9090).
55+
// Defaults to 0, which means the inference port is used.
56+
MetricsPort int `json:"metricsPort"`
5157
}
5258

5359
// NewHTTPMetricsDataSource constructs a MetricsDataSource with the given scheme and path.
5460
// InsecureSkipVerify defaults to true (matching the factory default).
5561
// Use this function directly in tests to bypass JSON parameter marshaling.
5662
func NewHTTPMetricsDataSource(scheme, path, name string) (*http.HTTPDataSource, error) {
57-
return http.NewHTTPDataSource(scheme, path, defaultMetricsInsecureSkipVerify,
63+
return http.NewHTTPDataSource(scheme, path, defaultMetricsInsecureSkipVerify, 0,
5864
MetricsDataSourceType, name, parseMetrics, PrometheusMetricType)
5965
}
6066

@@ -72,7 +78,7 @@ func MetricsDataSourceFactory(name string, parameters json.RawMessage, handle fw
7278
}
7379
}
7480

75-
return http.NewHTTPDataSource(cfg.Scheme, cfg.Path, cfg.InsecureSkipVerify, MetricsDataSourceType,
81+
return http.NewHTTPDataSource(cfg.Scheme, cfg.Path, cfg.InsecureSkipVerify, cfg.MetricsPort, MetricsDataSourceType,
7682
name, parseMetrics, PrometheusMetricType)
7783
}
7884

@@ -89,6 +95,7 @@ const (
8995
modelServerMetricsPathFlag = "model-server-metrics-path"
9096
modelServerMetricsSchemeFlag = "model-server-metrics-scheme"
9197
modelServerMetricsInsecureSkipVerifyFlag = "model-server-metrics-https-insecure-skip-verify"
98+
modelServerMetricsPortFlag = "model-server-metrics-port"
9299
)
93100

94101
// DataSource parameters values - Priority (lowest → highest):
@@ -116,6 +123,10 @@ func defaultDataSourceConfigParams() (*metricsDatasourceParams, error) {
116123
cfg.InsecureSkipVerify = insecure
117124
}
118125

126+
if port, ok := fromIntFlag(modelServerMetricsPortFlag); ok {
127+
cfg.MetricsPort = port
128+
}
129+
119130
return cfg, nil
120131
}
121132

@@ -129,6 +140,20 @@ func fromStringFlag(name string) (string, bool) {
129140
return f.Value.String(), true
130141
}
131142

143+
// fromIntFlag returns the value of a registered pflag int flag.
144+
// The second return value is false when the flag is not registered or not set by the user.
145+
func fromIntFlag(name string) (int, bool) {
146+
f := pflag.Lookup(name)
147+
if f == nil || !f.Changed {
148+
return 0, false
149+
}
150+
v, err := strconv.Atoi(f.Value.String())
151+
if err != nil {
152+
return 0, false
153+
}
154+
return v, true
155+
}
156+
132157
// fromBoolFlag returns the value of a registered pflag bool flag.
133158
// The second return value is false when the flag is not registered; no error is returned in that case.
134159
// An error is returned only when the flag exists but its value cannot be parsed as a bool.

pkg/epp/framework/plugins/datalayer/source/metrics/datasource_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,11 @@ import (
2828
)
2929

3030
func TestDatasource(t *testing.T) {
31-
_, err := http.NewHTTPDataSource("invalid", "/metrics", true, MetricsDataSourceType,
31+
_, err := http.NewHTTPDataSource("invalid", "/metrics", true, 0, MetricsDataSourceType,
3232
"metrics-data-source", parseMetrics, PrometheusMetricType)
3333
assert.NotNil(t, err, "expected to fail with invalid scheme")
3434

35-
source, err := http.NewHTTPDataSource("https", "/metrics", true, MetricsDataSourceType,
35+
source, err := http.NewHTTPDataSource("https", "/metrics", true, 0, MetricsDataSourceType,
3636
"metrics-data-source", parseMetrics, PrometheusMetricType)
3737
assert.Nil(t, err, "failed to create HTTP datasource")
3838

test/integration/epp/runtime_polling_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ func TestRuntimePollingDispatch(t *testing.T) {
8686
r := datalayer.NewRuntime(pollingInterval)
8787
ext := mocks.NewPollingExtractor("test-extractor")
8888

89-
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, "test-http", "test-source",
89+
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, 0, "test-http", "test-source",
9090
parsePrometheusMetrics, reflect.TypeOf(fwkdl.Metrics{}))
9191
require.NoError(t, err)
9292

@@ -138,7 +138,7 @@ func TestRuntimePollingMultipleExtractors(t *testing.T) {
138138
ext1 := mocks.NewPollingExtractor("extractor-1")
139139
ext2 := mocks.NewPollingExtractor("extractor-2")
140140

141-
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, "test-http", "test-source",
141+
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, 0, "test-http", "test-source",
142142
parsePrometheusMetrics, reflect.TypeOf(fwkdl.Metrics{}))
143143
require.NoError(t, err)
144144

@@ -187,7 +187,7 @@ func TestRuntimePollingEndpointLifecycle(t *testing.T) {
187187
r := datalayer.NewRuntime(pollingInterval)
188188
ext := mocks.NewPollingExtractor("lifecycle-extractor")
189189

190-
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, "test-http", "test-source",
190+
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, 0, "test-http", "test-source",
191191
parsePrometheusMetrics, reflect.TypeOf(fwkdl.Metrics{}))
192192
require.NoError(t, err)
193193

@@ -241,7 +241,7 @@ func TestRuntimePollingWithoutExtractors(t *testing.T) {
241241

242242
r := datalayer.NewRuntime(50 * time.Millisecond)
243243

244-
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, "test-http", "test-source",
244+
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, 0, "test-http", "test-source",
245245
parsePrometheusMetrics, reflect.TypeOf(fwkdl.Metrics{}))
246246
require.NoError(t, err)
247247

@@ -280,7 +280,7 @@ func TestRuntimePollingHTTPError(t *testing.T) {
280280
r := datalayer.NewRuntime(pollingInterval)
281281
ext := mocks.NewPollingExtractor("error-extractor")
282282

283-
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, "test-http", "test-source",
283+
httpSrc, err := httpds.NewHTTPDataSource("http", "/metrics", true, 0, "test-http", "test-source",
284284
parsePrometheusMetrics, reflect.TypeOf(fwkdl.Metrics{}))
285285
require.NoError(t, err)
286286

0 commit comments

Comments
 (0)