From 7a889714a7704b6d45989df57653e527a68c5e8c Mon Sep 17 00:00:00 2001 From: bobzetian Date: Fri, 16 Jan 2026 10:01:29 +0000 Subject: [PATCH 1/4] Add appProtocol for gRPC support. --- api/v1/inferencepool_types.go | 38 +++- api/v1/zz_generated.deepcopy.go | 17 +- apix/v1alpha2/inferencepool_conversion.go | 2 +- .../v1alpha2/inferencepool_conversion_test.go | 2 +- .../api/v1/endpointpickerport.go | 43 +++++ .../api/v1/endpointpickerref.go | 12 +- client-go/applyconfiguration/api/v1/port.go | 11 +- client-go/applyconfiguration/utils.go | 2 + ...ence.networking.k8s.io_inferencepools.yaml | 18 +- docs/proposals/-grpc-support/README.md | 167 ++++++++++++++++++ .../-grpc-support/images/epp_envoy_grpc.svg | 102 +++++++++++ test/cel/inferencepool_test.go | 17 +- test/cel/main_test.go | 4 - 13 files changed, 418 insertions(+), 17 deletions(-) create mode 100644 client-go/applyconfiguration/api/v1/endpointpickerport.go create mode 100644 docs/proposals/-grpc-support/README.md create mode 100644 docs/proposals/-grpc-support/images/epp_envoy_grpc.svg diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 92e7aff14b..8d40810e37 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -76,6 +76,7 @@ type InferencePoolSpec struct { // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=8 // +kubebuilder:validation:XValidation:message="port number must be unique",rule="self.all(p1, self.exists_one(p2, p1.number==p2.number))" + // +kubebuilder:validation:XValidation:message="all ports must have the same AppProtocol",rule="self.all(p, (has(p.appProtocol) ? p.appProtocol : 'Unset') == (has(self[0].appProtocol) ? self[0].appProtocol : 'Unset'))" // +listType=atomic // +required TargetPorts []Port `json:"targetPorts,omitempty"` @@ -94,8 +95,34 @@ type Port struct { // // +required Number PortNumber `json:"number,omitempty"` + + // AppProtocol describes the application protocol for this port. + // + // If unspecified, the protocol defaults to HTTP/1.1. + // + // Supported values include: + // * "http": HTTP/1.1. This is the default. + // * "kubernetes.io/h2c": HTTP/2 over cleartext. + // + // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" + // +optional + AppProtocol AppProtocol `json:"appProtocol,omitempty"` } +// AppProtocol describes the application protocol for a port. +type AppProtocol string + +const ( + // AppProtocolHTTP represents the HTTP/1.1 protocol. + // This is the default protocol if AppProtocol is unspecified. + AppProtocolHTTP AppProtocol = "http" + + // AppProtocolH2C represents HTTP/2 over cleartext (h2c). + // This protocol is typically used for gRPC workloads where TLS is terminated + // at the Gateway or not used within the cluster. + AppProtocolH2C AppProtocol = "kubernetes.io/h2c" +) + // EndpointPickerRef specifies a reference to an Endpoint Picker extension and its // associated configuration. // +kubebuilder:validation:XValidation:rule="self.kind != 'Service' || has(self.port)",message="port is required when kind is 'Service' or unspecified (defaults to 'Service')" @@ -136,7 +163,7 @@ type EndpointPickerRef struct { // resource or this field. // // +optional - Port *Port `json:"port,omitempty"` + Port *EndpointPickerPort `json:"port,omitempty"` // FailureMode configures how the parent handles the case when the Endpoint Picker extension // is non-responsive. When unspecified, defaults to "FailClose". @@ -146,6 +173,15 @@ type EndpointPickerRef struct { FailureMode EndpointPickerFailureMode `json:"failureMode,omitempty"` } +// EndpointPickerPort defines the network port for the Endpoint Picker extension. +type EndpointPickerPort struct { + // Number defines the port number of the Endpoint Picker service. + // The number must be in the range 1 to 65535. + // + // +required + Number PortNumber `json:"number,omitempty"` +} + // EndpointPickerFailureMode defines the options for how the parent handles the case when the // Endpoint Picker extension is non-responsive. // diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 3b003632af..c7ae21f1d7 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -25,6 +25,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointPickerPort) DeepCopyInto(out *EndpointPickerPort) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerPort. +func (in *EndpointPickerPort) DeepCopy() *EndpointPickerPort { + if in == nil { + return nil + } + out := new(EndpointPickerPort) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EndpointPickerRef) DeepCopyInto(out *EndpointPickerRef) { *out = *in @@ -35,7 +50,7 @@ func (in *EndpointPickerRef) DeepCopyInto(out *EndpointPickerRef) { } if in.Port != nil { in, out := &in.Port, &out.Port - *out = new(Port) + *out = new(EndpointPickerPort) **out = **in } } diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 01520dc4bc..9681a26486 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -254,7 +254,7 @@ func convertExtensionRefToV1(src *Extension) (v1.EndpointPickerRef, error) { } endpointPickerRef.Name = v1.ObjectName(src.Name) if src.PortNumber != nil { - endpointPickerRef.Port = ptr.To(v1.Port{Number: v1.PortNumber(*src.PortNumber)}) + endpointPickerRef.Port = ptr.To(v1.EndpointPickerPort{Number: v1.PortNumber(*src.PortNumber)}) } if src.FailureMode != nil { endpointPickerRef.FailureMode = v1.EndpointPickerFailureMode(*src.FailureMode) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 64b222fed2..2f378bb3e8 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -35,7 +35,7 @@ var ( v1Group = v1.Group("my-group") v1Kind = v1.Kind("MyKind") v1FailureMode = v1.EndpointPickerFailureMode("Deny") - v1Port = v1.Port{Number: 9000} + v1Port = v1.EndpointPickerPort{Number: 9000} ) func TestInferencePoolConvertTo(t *testing.T) { diff --git a/client-go/applyconfiguration/api/v1/endpointpickerport.go b/client-go/applyconfiguration/api/v1/endpointpickerport.go new file mode 100644 index 0000000000..7533e3c079 --- /dev/null +++ b/client-go/applyconfiguration/api/v1/endpointpickerport.go @@ -0,0 +1,43 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" +) + +// EndpointPickerPortApplyConfiguration represents a declarative configuration of the EndpointPickerPort type for use +// with apply. +type EndpointPickerPortApplyConfiguration struct { + Number *apiv1.PortNumber `json:"number,omitempty"` +} + +// EndpointPickerPortApplyConfiguration constructs a declarative configuration of the EndpointPickerPort type for use with +// apply. +func EndpointPickerPort() *EndpointPickerPortApplyConfiguration { + return &EndpointPickerPortApplyConfiguration{} +} + +// WithNumber sets the Number field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Number field is set to the value of the last call. +func (b *EndpointPickerPortApplyConfiguration) WithNumber(value apiv1.PortNumber) *EndpointPickerPortApplyConfiguration { + b.Number = &value + return b +} diff --git a/client-go/applyconfiguration/api/v1/endpointpickerref.go b/client-go/applyconfiguration/api/v1/endpointpickerref.go index 0d7886239d..b237a390d6 100644 --- a/client-go/applyconfiguration/api/v1/endpointpickerref.go +++ b/client-go/applyconfiguration/api/v1/endpointpickerref.go @@ -25,11 +25,11 @@ import ( // EndpointPickerRefApplyConfiguration represents a declarative configuration of the EndpointPickerRef type for use // with apply. type EndpointPickerRefApplyConfiguration struct { - Group *apiv1.Group `json:"group,omitempty"` - Kind *apiv1.Kind `json:"kind,omitempty"` - Name *apiv1.ObjectName `json:"name,omitempty"` - Port *PortApplyConfiguration `json:"port,omitempty"` - FailureMode *apiv1.EndpointPickerFailureMode `json:"failureMode,omitempty"` + Group *apiv1.Group `json:"group,omitempty"` + Kind *apiv1.Kind `json:"kind,omitempty"` + Name *apiv1.ObjectName `json:"name,omitempty"` + Port *EndpointPickerPortApplyConfiguration `json:"port,omitempty"` + FailureMode *apiv1.EndpointPickerFailureMode `json:"failureMode,omitempty"` } // EndpointPickerRefApplyConfiguration constructs a declarative configuration of the EndpointPickerRef type for use with @@ -65,7 +65,7 @@ func (b *EndpointPickerRefApplyConfiguration) WithName(value apiv1.ObjectName) * // WithPort sets the Port field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Port field is set to the value of the last call. -func (b *EndpointPickerRefApplyConfiguration) WithPort(value *PortApplyConfiguration) *EndpointPickerRefApplyConfiguration { +func (b *EndpointPickerRefApplyConfiguration) WithPort(value *EndpointPickerPortApplyConfiguration) *EndpointPickerRefApplyConfiguration { b.Port = value return b } diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go index 6067a5d388..d31fcf7aa7 100644 --- a/client-go/applyconfiguration/api/v1/port.go +++ b/client-go/applyconfiguration/api/v1/port.go @@ -25,7 +25,8 @@ import ( // PortApplyConfiguration represents a declarative configuration of the Port type for use // with apply. type PortApplyConfiguration struct { - Number *apiv1.PortNumber `json:"number,omitempty"` + Number *apiv1.PortNumber `json:"number,omitempty"` + AppProtocol *apiv1.AppProtocol `json:"appProtocol,omitempty"` } // PortApplyConfiguration constructs a declarative configuration of the Port type for use with @@ -41,3 +42,11 @@ func (b *PortApplyConfiguration) WithNumber(value apiv1.PortNumber) *PortApplyCo b.Number = &value return b } + +// WithAppProtocol sets the AppProtocol field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the AppProtocol field is set to the value of the last call. +func (b *PortApplyConfiguration) WithAppProtocol(value apiv1.AppProtocol) *PortApplyConfiguration { + b.AppProtocol = &value + return b +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index 93d50e54bc..e15a2ba062 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -36,6 +36,8 @@ import ( func ForKind(kind schema.GroupVersionKind) interface{} { switch kind { // Group=inference.networking.k8s.io, Version=v1 + case v1.SchemeGroupVersion.WithKind("EndpointPickerPort"): + return &apiv1.EndpointPickerPortApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("EndpointPickerRef"): return &apiv1.EndpointPickerRefApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("InferencePool"): diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index a3f7696333..060f349151 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -99,7 +99,7 @@ spec: properties: number: description: |- - Number defines the port number to access the selected model server Pods. + Number defines the port number of the Endpoint Picker service. The number must be in the range 1 to 65535. format: int32 maximum: 65535 @@ -162,6 +162,19 @@ spec: description: Port defines the network port that will be exposed by this InferencePool. properties: + appProtocol: + description: |- + AppProtocol describes the application protocol for this port. + + If unspecified, the protocol defaults to HTTP/1.1. + + Supported values include: + * "http": HTTP/1.1. This is the default. + * "kubernetes.io/h2c": HTTP/2 over cleartext. + enum: + - http + - kubernetes.io/h2c + type: string number: description: |- Number defines the port number to access the selected model server Pods. @@ -180,6 +193,9 @@ spec: x-kubernetes-validations: - message: port number must be unique rule: self.all(p1, self.exists_one(p2, p1.number==p2.number)) + - message: all ports must have the same AppProtocol + rule: 'self.all(p, (has(p.appProtocol) ? p.appProtocol : ''Unset'') + == (has(self[0].appProtocol) ? self[0].appProtocol : ''Unset''))' required: - endpointPickerRef - selector diff --git a/docs/proposals/-grpc-support/README.md b/docs/proposals/-grpc-support/README.md new file mode 100644 index 0000000000..5901cb3971 --- /dev/null +++ b/docs/proposals/-grpc-support/README.md @@ -0,0 +1,167 @@ +# gRPC support + +Author(s): @zetxqx, @ahg-g + +For the full, detailed proposal, please see the [original proposal](https://docs.google.com/document/d/1H-WazsrSQOVi8bGgfBLuQ7RTypwa__EncVNu-yRBw1U/edit?tab=t.4i912lhthtwx#heading=h.cvvvoep0ljs9). + +## Motivation +Model servers (like vLLM [gRPC](https://github.com/vllm-project/vllm/blob/main/vllm/grpc/vllm_engine.proto) and SGLang [gRPC](https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/grpc/sglang_scheduler.proto)) now offer gRPC endpoints. As the gateway layer, Inference gateway extension needs to support gRPC not only for compatibility with those gRPC model server but also for the following benefits: +* gRPC Protocol Efficiency: It has binary framing, more efficient than text-based JSON, reducing payload size and parsing overhead. +* Flexibility: Gives us the flexibility to support tokenization at GAIE Level, because all those gRPC endpoints are supporting tokens-in, tokens-out. + +## Goal +* InferencePool API changes to support gRPC +* EPP changes to support gRPC including: + * gRPC-in, gRPC-out + * HTTP-in, gRPC-out + +## Proposed API Changes +The current InferencePool implementation defaults to HTTP/1.1 communication. To support gRPC, which operates over HTTP/2, a field must be introduced for the gateway controller to identify the appropriate appProtocol for model server communication. + +This proposal introduces an `AppProtocol` (similar to k8s [servicePort](https://github.com/kubernetes/api/blob/82d2200b6363cca3aba07c043b95d88704c2ddb3/core/v1/types.go#L6204C1-L6220C92)) field to the existing `Port` struct within the `InferencePool`. + +Additionally, to avoid confusion, the port definition used in `EndpointPickerRef` is decoupled from `InferencePoolSpec`. `EndpointPickerPort` is introduced for the endpoint picker configuration, ensuring `AppProtocol` is only associated with the `InferencePool` target ports. + +```go +// InferencePoolSpec defines the desired state of the InferencePool. +type InferencePoolSpec struct { + // ... other fields + + // ... omitted + // +kubebuilder:validation:XValidation:message="all ports must have the same AppProtocol",rule="self.all(p, (has(p.appProtocol) ? p.appProtocol : 'Unset') == (has(self[0].appProtocol) ? self[0].appProtocol : 'Unset'))" + // ... omitted + TargetPorts []Port `json:"targetPorts,omitempty"` + + // EndpointPickerRef is a reference to the Endpoint Picker extension and its + // associated configuration. + // + // +required + EndpointPickerRef EndpointPickerRef `json:"endpointPickerRef,omitzero"` +} + +// Port defines the network port that will be exposed by this InferencePool. +type Port struct { + // Number defines the port number to access the selected model server Pods. + // The number must be in the range 1 to 65535. + // + // +required + Number PortNumber `json:"number,omitempty"` + + // AppProtocol describes the application protocol for this port. + // + // If unspecified, the protocol defaults to HTTP/1.1. + // + // Supported values include: + // * "http": HTTP/1.1. This is the default. + // * "kubernetes.io/h2c": HTTP/2 over cleartext. + // + // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" + // +optional + AppProtocol AppProtocol `json:"appProtocol,omitempty"` +} + +// AppProtocol describes the application protocol for a port. +type AppProtocol string + +const ( + // AppProtocolHTTP represents the HTTP/1.1 protocol. + // This is the default protocol if AppProtocol is unspecified. + AppProtocolHTTP AppProtocol = "http" + + // AppProtocolH2C represents HTTP/2 over cleartext (h2c). + // This protocol is typically used for gRPC workloads where TLS is terminated + // at the Gateway or not used within the cluster. + AppProtocolH2C AppProtocol = "kubernetes.io/h2c" +) + +// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its +// associated configuration. +type EndpointPickerRef struct { + // ... Omitted for simplicity + + // Port is the port of the Endpoint Picker extension service. + // + // Port is required when the referent is a Kubernetes Service. In this + // case, the port number is the service port number, not the target port. + // For other resources, destination port might be derived from the referent + // resource or this field. + // + // +optional + Port *EndpointPickerPort `json:"port,omitempty"` + // ... Omitted for simplicity +} + +// EndpointPickerPort defines the network port for the Endpoint Picker extension. +type EndpointPickerPort struct { + // Number defines the port number of the Endpoint Picker service. + // The number must be in the range 1 to 65535. + // + // +required + Number PortNumber `json:"number,omitempty"` +} +``` + +## EndPointPicker (EPP) Enhancements +The current implementation of the `pkg/epp/handlers/server.go` (ext_proc streaming server) is limited to handling HTTP/JSON payloads. To accommodate gRPC model servers, EPP need to be updated to support two primary traffic patterns: + +1. **gRPC-in-gRPC-out:** Both the client and model server utilize gRPC. In this scenario, EPP primarily needs to decode gRPC protobuf messages. +2. **http-in-gRPC-out:** The client sends HTTP/JSON requests (OpenAI API), while the model server expects gRPC. This requires EPP to perform the following transcoding tasks: + 1. Transcode incoming HTTP/JSON requests to gRPC. + 2. Transcode gRPC responses back to HTTP/JSON. + 3. For streaming workloads, transcode gRPC response streams into Server-Sent Events (SSE) format. + +The diagram below outlines the proposed changes within the ext_proc streaming server sequence diagram (simplified/omitted some components to focus on the key changes), highlighting new logic for protocol detection and transcoding in green: + +ext_proc EPP + +Specifically, the key components within the EPP codebase necessitating modification are: +* `pkg/epp/handlers/server.go`: Update the main `Process` loop to detect content type and delegate parsing. +* `pkg/epp/codec` (New Package): Implement parsers for JSON (existing logic) and gRPC (new logic). + +**More implementation details:** +1. The EPP should determine when transcoding is required for http-in, gRPC-out scenarios. This can be achieved through one of the following methods: + 1. Implementing a configuration flag or environment variable within EPP to explicitly signal the need for transcoding. + 2. **(Preferred)** EPP can inspect the observed InferencePool specification. If `Port.AppProtocol` is designated as `kubernetes.io/h2c`, transcoding should happen. +2. EPP needs to know how to do protocol conversion. This will be mainly based on headers diff between HTTP/JSON and gRPC. +3. A designated folder will be required to maintain copies of the vLLM and SGLang protocol buffers. To ensure production stability, a compatibility matrix will be needed for users, mapping supported GAIE versions to model server proto versions. + +## Implementation Plan +There are several dimensions we can parallel the whole work: +* **Support for different protocol pattern:** + * gRPC-in, gRPC-out + * HTTP-in, gRPC-out +* **Support for Key APIs** (listing most critical pathways): + * Generate (Non-Streaming) - `/chat/completion` + * Generate (Streaming) + * GetModelInfo `/v1/models` + +The initial plan prioritizes the following: +1. The initial focus will be on vLLm gRPC. Meanwhile, efforts will continue to establish a separate package for common proto definitions across model servers. +2. Implement gRPC-in, gRPC-out first, as it presents the simplest path forward. However, rapid support for HTTP-in, gRPC-out is crucial given the majority of users rely on the OpenAI compatible endpoint. +3. The Generate API must be prioritized among all supported APIs. + +### Gateway Implementation Requirements +* Modify the InferencePool API to incorporate an `appProtocol` field. +* Introduce conformance tests to validate gRPC support. +* Await the necessary gateway layer support for specifying the `appProtocol` within InferencePool definitions. + +### Phase 1: gRPC-to-gRPC Protocol Support +* Implement the Generate API in non-streaming mode. +* Implement the streaming mode for the Generate API. +* Provide practical examples, including epp and vLLM gRPC manifests, and update the Helm chart as necessary. +* Conduct performance benchmarking to confirm no degradation compared to the existing HTTP support. + +### Phase 2: HTTP-to-gRPC Conversion Support +* Support non-streaming `/chat/completion`. +* Support streaming `/chat/completion`. +* Update examples and the Helm chart accordingly. +* Perform benchmarking to ensure minimal performance impact relative to existing HTTP support. + +### Phase 3: Additional API Support +* Implement support for GetModelInfo and the `/v1/models` endpoint. +* Implement support for GetServerInfo (if necessary). +* Address metrics scraping specifically for gRPC endpoints (if necessary). + +### Future Plan +* Introduce support for SGLang gRPC. +* Integrate disaggregated tokenization capabilities. diff --git a/docs/proposals/-grpc-support/images/epp_envoy_grpc.svg b/docs/proposals/-grpc-support/images/epp_envoy_grpc.svg new file mode 100644 index 0000000000..25b7685ad2 --- /dev/null +++ b/docs/proposals/-grpc-support/images/epp_envoy_grpc.svg @@ -0,0 +1,102 @@ +ModelServerEPPEnvoyClientModelServerEPPEnvoyClientEPP Request ProcessingEPP Response ProcessingHTTP/JSON or gRPC Requestext_proc: RequestHeadersProtocol & Method Detection via headers: ContentType, :pathContinueext_proc: RequestBodyDecode (both HTTP/JSON and gRPC)UnmarshallSchedule DecisionTranscode (when JSON -> gRPC)BodyMutation + HeaderMutation (Routing Target)Forwarded RequestBackend Responseext_proc: ResponseHeadersHeaderMutation (if transcoding)ext_proc: ResponseBodyTranscode Response (when gRPC -> JSON)BodyMutationFinal Response \ No newline at end of file diff --git a/test/cel/inferencepool_test.go b/test/cel/inferencepool_test.go index deccb6e03f..1617cb3a7b 100644 --- a/test/cel/inferencepool_test.go +++ b/test/cel/inferencepool_test.go @@ -47,7 +47,7 @@ func TestValidateInferencePool(t *testing.T) { EndpointPickerRef: v1.EndpointPickerRef{ Name: "epp", Kind: "Service", - Port: ptrTo(v1.Port{Number: 9002}), + Port: &v1.EndpointPickerPort{Number: 9002}, }, }, } @@ -94,6 +94,21 @@ func TestValidateInferencePool(t *testing.T) { }, wantErrors: []string{"port number must be unique"}, }, + { + desc: "passes validation with port numbers containing same app protocol", + mutate: func(ip *v1.InferencePool) { + ip.Spec.TargetPorts = []v1.Port{{Number: 8000, AppProtocol: v1.AppProtocolH2C}, {Number: 80, AppProtocol: v1.AppProtocolH2C}, + {Number: 8080, AppProtocol: v1.AppProtocolH2C}, {Number: 443, AppProtocol: v1.AppProtocolH2C}} + }, + wantErrors: nil, + }, + { + desc: "fails validation with port numbers containing different app protocol", + mutate: func(ip *v1.InferencePool) { + ip.Spec.TargetPorts = []v1.Port{{Number: 8000}, {Number: 80, AppProtocol: v1.AppProtocolH2C}, {Number: 8080}, {Number: 443}} + }, + wantErrors: []string{"all ports must have the same AppProtocol"}, + }, } for _, tc := range testCases { diff --git a/test/cel/main_test.go b/test/cel/main_test.go index 2758a56320..7ead528182 100644 --- a/test/cel/main_test.go +++ b/test/cel/main_test.go @@ -97,10 +97,6 @@ func TestMain(m *testing.M) { os.Exit(rc) } -func ptrTo[T any](a T) *T { - return &a -} - func celErrorStringMatches(got, want string) bool { gotL := strings.ToLower(got) wantL := strings.ToLower(want) From bc41e422e9cda1b2b39db00ba01769870d25d45a Mon Sep 17 00:00:00 2001 From: bobzetian Date: Fri, 16 Jan 2026 20:49:34 +0000 Subject: [PATCH 2/4] rename the proposal folder to include pr number. --- docs/proposals/{-grpc-support => 2162-grpc-support}/README.md | 0 .../images/epp_envoy_grpc.svg | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/proposals/{-grpc-support => 2162-grpc-support}/README.md (100%) rename docs/proposals/{-grpc-support => 2162-grpc-support}/images/epp_envoy_grpc.svg (100%) diff --git a/docs/proposals/-grpc-support/README.md b/docs/proposals/2162-grpc-support/README.md similarity index 100% rename from docs/proposals/-grpc-support/README.md rename to docs/proposals/2162-grpc-support/README.md diff --git a/docs/proposals/-grpc-support/images/epp_envoy_grpc.svg b/docs/proposals/2162-grpc-support/images/epp_envoy_grpc.svg similarity index 100% rename from docs/proposals/-grpc-support/images/epp_envoy_grpc.svg rename to docs/proposals/2162-grpc-support/images/epp_envoy_grpc.svg From dcda23d947fa27af0ef8533fc201ae02adb1578a Mon Sep 17 00:00:00 2001 From: bobzetian Date: Fri, 16 Jan 2026 22:26:39 +0000 Subject: [PATCH 3/4] move appprotocol out of Port. --- api/v1/inferencepool_types.go | 36 ++++------ api/v1/zz_generated.deepcopy.go | 17 +---- apix/v1alpha2/inferencepool_conversion.go | 2 +- .../v1alpha2/inferencepool_conversion_test.go | 2 +- .../api/v1/endpointpickerport.go | 43 ------------ .../api/v1/endpointpickerref.go | 12 ++-- .../api/v1/inferencepoolspec.go | 13 ++++ client-go/applyconfiguration/api/v1/port.go | 11 +--- client-go/applyconfiguration/utils.go | 2 - ...ence.networking.k8s.io_inferencepools.yaml | 31 ++++----- docs/proposals/2162-grpc-support/README.md | 65 ++----------------- test/cel/inferencepool_test.go | 24 +++---- 12 files changed, 62 insertions(+), 196 deletions(-) delete mode 100644 client-go/applyconfiguration/api/v1/endpointpickerport.go diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 8d40810e37..07bd8c740f 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -76,11 +76,22 @@ type InferencePoolSpec struct { // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=8 // +kubebuilder:validation:XValidation:message="port number must be unique",rule="self.all(p1, self.exists_one(p2, p1.number==p2.number))" - // +kubebuilder:validation:XValidation:message="all ports must have the same AppProtocol",rule="self.all(p, (has(p.appProtocol) ? p.appProtocol : 'Unset') == (has(self[0].appProtocol) ? self[0].appProtocol : 'Unset'))" // +listType=atomic // +required TargetPorts []Port `json:"targetPorts,omitempty"` + // AppProtocol describes the application protocol for all the target ports. + // + // If unspecified, the protocol defaults to HTTP/1.1. + // + // Supported values include: + // * "http": HTTP/1.1. This is the default. + // * "kubernetes.io/h2c": HTTP/2 over cleartext. + // + // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" + // +optional + AppProtocol AppProtocol `json:"appProtocol,omitempty"` + // EndpointPickerRef is a reference to the Endpoint Picker extension and its // associated configuration. // @@ -95,18 +106,6 @@ type Port struct { // // +required Number PortNumber `json:"number,omitempty"` - - // AppProtocol describes the application protocol for this port. - // - // If unspecified, the protocol defaults to HTTP/1.1. - // - // Supported values include: - // * "http": HTTP/1.1. This is the default. - // * "kubernetes.io/h2c": HTTP/2 over cleartext. - // - // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" - // +optional - AppProtocol AppProtocol `json:"appProtocol,omitempty"` } // AppProtocol describes the application protocol for a port. @@ -163,7 +162,7 @@ type EndpointPickerRef struct { // resource or this field. // // +optional - Port *EndpointPickerPort `json:"port,omitempty"` + Port *Port `json:"port,omitempty"` // FailureMode configures how the parent handles the case when the Endpoint Picker extension // is non-responsive. When unspecified, defaults to "FailClose". @@ -173,15 +172,6 @@ type EndpointPickerRef struct { FailureMode EndpointPickerFailureMode `json:"failureMode,omitempty"` } -// EndpointPickerPort defines the network port for the Endpoint Picker extension. -type EndpointPickerPort struct { - // Number defines the port number of the Endpoint Picker service. - // The number must be in the range 1 to 65535. - // - // +required - Number PortNumber `json:"number,omitempty"` -} - // EndpointPickerFailureMode defines the options for how the parent handles the case when the // Endpoint Picker extension is non-responsive. // diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index c7ae21f1d7..3b003632af 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -25,21 +25,6 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EndpointPickerPort) DeepCopyInto(out *EndpointPickerPort) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerPort. -func (in *EndpointPickerPort) DeepCopy() *EndpointPickerPort { - if in == nil { - return nil - } - out := new(EndpointPickerPort) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EndpointPickerRef) DeepCopyInto(out *EndpointPickerRef) { *out = *in @@ -50,7 +35,7 @@ func (in *EndpointPickerRef) DeepCopyInto(out *EndpointPickerRef) { } if in.Port != nil { in, out := &in.Port, &out.Port - *out = new(EndpointPickerPort) + *out = new(Port) **out = **in } } diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 9681a26486..01520dc4bc 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -254,7 +254,7 @@ func convertExtensionRefToV1(src *Extension) (v1.EndpointPickerRef, error) { } endpointPickerRef.Name = v1.ObjectName(src.Name) if src.PortNumber != nil { - endpointPickerRef.Port = ptr.To(v1.EndpointPickerPort{Number: v1.PortNumber(*src.PortNumber)}) + endpointPickerRef.Port = ptr.To(v1.Port{Number: v1.PortNumber(*src.PortNumber)}) } if src.FailureMode != nil { endpointPickerRef.FailureMode = v1.EndpointPickerFailureMode(*src.FailureMode) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 2f378bb3e8..64b222fed2 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -35,7 +35,7 @@ var ( v1Group = v1.Group("my-group") v1Kind = v1.Kind("MyKind") v1FailureMode = v1.EndpointPickerFailureMode("Deny") - v1Port = v1.EndpointPickerPort{Number: 9000} + v1Port = v1.Port{Number: 9000} ) func TestInferencePoolConvertTo(t *testing.T) { diff --git a/client-go/applyconfiguration/api/v1/endpointpickerport.go b/client-go/applyconfiguration/api/v1/endpointpickerport.go deleted file mode 100644 index 7533e3c079..0000000000 --- a/client-go/applyconfiguration/api/v1/endpointpickerport.go +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -// Code generated by applyconfiguration-gen. DO NOT EDIT. - -package v1 - -import ( - apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" -) - -// EndpointPickerPortApplyConfiguration represents a declarative configuration of the EndpointPickerPort type for use -// with apply. -type EndpointPickerPortApplyConfiguration struct { - Number *apiv1.PortNumber `json:"number,omitempty"` -} - -// EndpointPickerPortApplyConfiguration constructs a declarative configuration of the EndpointPickerPort type for use with -// apply. -func EndpointPickerPort() *EndpointPickerPortApplyConfiguration { - return &EndpointPickerPortApplyConfiguration{} -} - -// WithNumber sets the Number field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Number field is set to the value of the last call. -func (b *EndpointPickerPortApplyConfiguration) WithNumber(value apiv1.PortNumber) *EndpointPickerPortApplyConfiguration { - b.Number = &value - return b -} diff --git a/client-go/applyconfiguration/api/v1/endpointpickerref.go b/client-go/applyconfiguration/api/v1/endpointpickerref.go index b237a390d6..0d7886239d 100644 --- a/client-go/applyconfiguration/api/v1/endpointpickerref.go +++ b/client-go/applyconfiguration/api/v1/endpointpickerref.go @@ -25,11 +25,11 @@ import ( // EndpointPickerRefApplyConfiguration represents a declarative configuration of the EndpointPickerRef type for use // with apply. type EndpointPickerRefApplyConfiguration struct { - Group *apiv1.Group `json:"group,omitempty"` - Kind *apiv1.Kind `json:"kind,omitempty"` - Name *apiv1.ObjectName `json:"name,omitempty"` - Port *EndpointPickerPortApplyConfiguration `json:"port,omitempty"` - FailureMode *apiv1.EndpointPickerFailureMode `json:"failureMode,omitempty"` + Group *apiv1.Group `json:"group,omitempty"` + Kind *apiv1.Kind `json:"kind,omitempty"` + Name *apiv1.ObjectName `json:"name,omitempty"` + Port *PortApplyConfiguration `json:"port,omitempty"` + FailureMode *apiv1.EndpointPickerFailureMode `json:"failureMode,omitempty"` } // EndpointPickerRefApplyConfiguration constructs a declarative configuration of the EndpointPickerRef type for use with @@ -65,7 +65,7 @@ func (b *EndpointPickerRefApplyConfiguration) WithName(value apiv1.ObjectName) * // WithPort sets the Port field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Port field is set to the value of the last call. -func (b *EndpointPickerRefApplyConfiguration) WithPort(value *EndpointPickerPortApplyConfiguration) *EndpointPickerRefApplyConfiguration { +func (b *EndpointPickerRefApplyConfiguration) WithPort(value *PortApplyConfiguration) *EndpointPickerRefApplyConfiguration { b.Port = value return b } diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index ca44987492..b9b258e3f6 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -18,11 +18,16 @@ limitations under the License. package v1 +import ( + apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" +) + // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + AppProtocol *apiv1.AppProtocol `json:"appProtocol,omitempty"` EndpointPickerRef *EndpointPickerRefApplyConfiguration `json:"endpointPickerRef,omitempty"` } @@ -53,6 +58,14 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApp return b } +// WithAppProtocol sets the AppProtocol field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the AppProtocol field is set to the value of the last call. +func (b *InferencePoolSpecApplyConfiguration) WithAppProtocol(value apiv1.AppProtocol) *InferencePoolSpecApplyConfiguration { + b.AppProtocol = &value + return b +} + // WithEndpointPickerRef sets the EndpointPickerRef field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the EndpointPickerRef field is set to the value of the last call. diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go index d31fcf7aa7..6067a5d388 100644 --- a/client-go/applyconfiguration/api/v1/port.go +++ b/client-go/applyconfiguration/api/v1/port.go @@ -25,8 +25,7 @@ import ( // PortApplyConfiguration represents a declarative configuration of the Port type for use // with apply. type PortApplyConfiguration struct { - Number *apiv1.PortNumber `json:"number,omitempty"` - AppProtocol *apiv1.AppProtocol `json:"appProtocol,omitempty"` + Number *apiv1.PortNumber `json:"number,omitempty"` } // PortApplyConfiguration constructs a declarative configuration of the Port type for use with @@ -42,11 +41,3 @@ func (b *PortApplyConfiguration) WithNumber(value apiv1.PortNumber) *PortApplyCo b.Number = &value return b } - -// WithAppProtocol sets the AppProtocol field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the AppProtocol field is set to the value of the last call. -func (b *PortApplyConfiguration) WithAppProtocol(value apiv1.AppProtocol) *PortApplyConfiguration { - b.AppProtocol = &value - return b -} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index e15a2ba062..93d50e54bc 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -36,8 +36,6 @@ import ( func ForKind(kind schema.GroupVersionKind) interface{} { switch kind { // Group=inference.networking.k8s.io, Version=v1 - case v1.SchemeGroupVersion.WithKind("EndpointPickerPort"): - return &apiv1.EndpointPickerPortApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("EndpointPickerRef"): return &apiv1.EndpointPickerRefApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("InferencePool"): diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 060f349151..40eb70c595 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -42,6 +42,19 @@ spec: spec: description: Spec defines the desired state of the InferencePool. properties: + appProtocol: + description: |- + AppProtocol describes the application protocol for all the target ports. + + If unspecified, the protocol defaults to HTTP/1.1. + + Supported values include: + * "http": HTTP/1.1. This is the default. + * "kubernetes.io/h2c": HTTP/2 over cleartext. + enum: + - http + - kubernetes.io/h2c + type: string endpointPickerRef: description: |- EndpointPickerRef is a reference to the Endpoint Picker extension and its @@ -99,7 +112,7 @@ spec: properties: number: description: |- - Number defines the port number of the Endpoint Picker service. + Number defines the port number to access the selected model server Pods. The number must be in the range 1 to 65535. format: int32 maximum: 65535 @@ -162,19 +175,6 @@ spec: description: Port defines the network port that will be exposed by this InferencePool. properties: - appProtocol: - description: |- - AppProtocol describes the application protocol for this port. - - If unspecified, the protocol defaults to HTTP/1.1. - - Supported values include: - * "http": HTTP/1.1. This is the default. - * "kubernetes.io/h2c": HTTP/2 over cleartext. - enum: - - http - - kubernetes.io/h2c - type: string number: description: |- Number defines the port number to access the selected model server Pods. @@ -193,9 +193,6 @@ spec: x-kubernetes-validations: - message: port number must be unique rule: self.all(p1, self.exists_one(p2, p1.number==p2.number)) - - message: all ports must have the same AppProtocol - rule: 'self.all(p, (has(p.appProtocol) ? p.appProtocol : ''Unset'') - == (has(self[0].appProtocol) ? self[0].appProtocol : ''Unset''))' required: - endpointPickerRef - selector diff --git a/docs/proposals/2162-grpc-support/README.md b/docs/proposals/2162-grpc-support/README.md index 5901cb3971..9406be052f 100644 --- a/docs/proposals/2162-grpc-support/README.md +++ b/docs/proposals/2162-grpc-support/README.md @@ -18,36 +18,17 @@ Model servers (like vLLM [gRPC](https://github.com/vllm-project/vllm/blob/main/v ## Proposed API Changes The current InferencePool implementation defaults to HTTP/1.1 communication. To support gRPC, which operates over HTTP/2, a field must be introduced for the gateway controller to identify the appropriate appProtocol for model server communication. -This proposal introduces an `AppProtocol` (similar to k8s [servicePort](https://github.com/kubernetes/api/blob/82d2200b6363cca3aba07c043b95d88704c2ddb3/core/v1/types.go#L6204C1-L6220C92)) field to the existing `Port` struct within the `InferencePool`. - -Additionally, to avoid confusion, the port definition used in `EndpointPickerRef` is decoupled from `InferencePoolSpec`. `EndpointPickerPort` is introduced for the endpoint picker configuration, ensuring `AppProtocol` is only associated with the `InferencePool` target ports. +This proposal introduces an `AppProtocol` (similar to k8s [servicePort](https://github.com/kubernetes/api/blob/82d2200b6363cca3aba07c043b95d88704c2ddb3/core/v1/types.go#L6204C1-L6220C92)) field to the `InferencePoolSpec` struct within the `InferencePool`. This field applies to all `TargetPorts`. ```go // InferencePoolSpec defines the desired state of the InferencePool. type InferencePoolSpec struct { // ... other fields - // ... omitted - // +kubebuilder:validation:XValidation:message="all ports must have the same AppProtocol",rule="self.all(p, (has(p.appProtocol) ? p.appProtocol : 'Unset') == (has(self[0].appProtocol) ? self[0].appProtocol : 'Unset'))" // ... omitted TargetPorts []Port `json:"targetPorts,omitempty"` - // EndpointPickerRef is a reference to the Endpoint Picker extension and its - // associated configuration. - // - // +required - EndpointPickerRef EndpointPickerRef `json:"endpointPickerRef,omitzero"` -} - -// Port defines the network port that will be exposed by this InferencePool. -type Port struct { - // Number defines the port number to access the selected model server Pods. - // The number must be in the range 1 to 65535. - // - // +required - Number PortNumber `json:"number,omitempty"` - - // AppProtocol describes the application protocol for this port. + // AppProtocol describes the application protocol for all the target ports. // // If unspecified, the protocol defaults to HTTP/1.1. // @@ -58,46 +39,8 @@ type Port struct { // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" // +optional AppProtocol AppProtocol `json:"appProtocol,omitempty"` -} - -// AppProtocol describes the application protocol for a port. -type AppProtocol string - -const ( - // AppProtocolHTTP represents the HTTP/1.1 protocol. - // This is the default protocol if AppProtocol is unspecified. - AppProtocolHTTP AppProtocol = "http" - // AppProtocolH2C represents HTTP/2 over cleartext (h2c). - // This protocol is typically used for gRPC workloads where TLS is terminated - // at the Gateway or not used within the cluster. - AppProtocolH2C AppProtocol = "kubernetes.io/h2c" -) - -// EndpointPickerRef specifies a reference to an Endpoint Picker extension and its -// associated configuration. -type EndpointPickerRef struct { - // ... Omitted for simplicity - - // Port is the port of the Endpoint Picker extension service. - // - // Port is required when the referent is a Kubernetes Service. In this - // case, the port number is the service port number, not the target port. - // For other resources, destination port might be derived from the referent - // resource or this field. - // - // +optional - Port *EndpointPickerPort `json:"port,omitempty"` - // ... Omitted for simplicity -} - -// EndpointPickerPort defines the network port for the Endpoint Picker extension. -type EndpointPickerPort struct { - // Number defines the port number of the Endpoint Picker service. - // The number must be in the range 1 to 65535. - // - // +required - Number PortNumber `json:"number,omitempty"` + // ... omitted } ``` @@ -121,7 +64,7 @@ Specifically, the key components within the EPP codebase necessitating modificat **More implementation details:** 1. The EPP should determine when transcoding is required for http-in, gRPC-out scenarios. This can be achieved through one of the following methods: 1. Implementing a configuration flag or environment variable within EPP to explicitly signal the need for transcoding. - 2. **(Preferred)** EPP can inspect the observed InferencePool specification. If `Port.AppProtocol` is designated as `kubernetes.io/h2c`, transcoding should happen. + 2. **(Preferred)** EPP can inspect the observed InferencePool specification. If `InferencePoolSpec.AppProtocol` is designated as `kubernetes.io/h2c`, transcoding should happen. 2. EPP needs to know how to do protocol conversion. This will be mainly based on headers diff between HTTP/JSON and gRPC. 3. A designated folder will be required to maintain copies of the vLLM and SGLang protocol buffers. To ensure production stability, a compatibility matrix will be needed for users, mapping supported GAIE versions to model server proto versions. diff --git a/test/cel/inferencepool_test.go b/test/cel/inferencepool_test.go index 1617cb3a7b..4d3a473cd0 100644 --- a/test/cel/inferencepool_test.go +++ b/test/cel/inferencepool_test.go @@ -47,7 +47,7 @@ func TestValidateInferencePool(t *testing.T) { EndpointPickerRef: v1.EndpointPickerRef{ Name: "epp", Kind: "Service", - Port: &v1.EndpointPickerPort{Number: 9002}, + Port: &v1.Port{Number: 9002}, }, }, } @@ -63,6 +63,13 @@ func TestValidateInferencePool(t *testing.T) { }, wantErrors: nil, }, + { + desc: "passes validation with a appProtocol configured", + mutate: func(ip *v1.InferencePool) { + ip.Spec.AppProtocol = v1.AppProtocolH2C + }, + wantErrors: nil, + }, { desc: "fails validation when kind is unset (defaults to Service) and port is missing", mutate: func(ip *v1.InferencePool) { @@ -94,21 +101,6 @@ func TestValidateInferencePool(t *testing.T) { }, wantErrors: []string{"port number must be unique"}, }, - { - desc: "passes validation with port numbers containing same app protocol", - mutate: func(ip *v1.InferencePool) { - ip.Spec.TargetPorts = []v1.Port{{Number: 8000, AppProtocol: v1.AppProtocolH2C}, {Number: 80, AppProtocol: v1.AppProtocolH2C}, - {Number: 8080, AppProtocol: v1.AppProtocolH2C}, {Number: 443, AppProtocol: v1.AppProtocolH2C}} - }, - wantErrors: nil, - }, - { - desc: "fails validation with port numbers containing different app protocol", - mutate: func(ip *v1.InferencePool) { - ip.Spec.TargetPorts = []v1.Port{{Number: 8000}, {Number: 80, AppProtocol: v1.AppProtocolH2C}, {Number: 8080}, {Number: 443}} - }, - wantErrors: []string{"all ports must have the same AppProtocol"}, - }, } for _, tc := range testCases { From 884647152efa70a2db3442024702c496923d7a48 Mon Sep 17 00:00:00 2001 From: bobzetian Date: Mon, 19 Jan 2026 19:13:26 +0000 Subject: [PATCH 4/4] add default. --- api/v1/inferencepool_types.go | 1 + config/crd/bases/inference.networking.k8s.io_inferencepools.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 07bd8c740f..a4d2d32189 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -89,6 +89,7 @@ type InferencePoolSpec struct { // * "kubernetes.io/h2c": HTTP/2 over cleartext. // // +kubebuilder:validation:Enum=http;"kubernetes.io/h2c" + // +kubebuilder:default="http" // +optional AppProtocol AppProtocol `json:"appProtocol,omitempty"` diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 40eb70c595..67cbdb1354 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -43,6 +43,7 @@ spec: description: Spec defines the desired state of the InferencePool. properties: appProtocol: + default: http description: |- AppProtocol describes the application protocol for all the target ports.