From 1e127acd17d9000e65793ea2b20e67e987fd42f4 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 13:18:43 -0700 Subject: [PATCH 01/38] TargetPortNumber int32 to become TargetPorts []Port # Conflicts: # api/v1/inferencepool_types.go # api/v1/zz_generated.deepcopy.go # client-go/applyconfiguration/api/v1/inferencepoolspec.go # Conflicts: # api/v1/zz_generated.deepcopy.go # client-go/applyconfiguration/api/v1/inferencepoolspec.go --- api/v1/inferencepool_types.go | 23 +++- api/v1/zz_generated.deepcopy.go | 122 +++++++++++++----- .../api/v1/inferencepoolspec.go | 43 ++++-- client-go/applyconfiguration/api/v1/port.go | 39 ++++++ client-go/applyconfiguration/utils.go | 2 + ...ence.networking.k8s.io_inferencepools.yaml | 29 +++-- 6 files changed, 197 insertions(+), 61 deletions(-) create mode 100644 client-go/applyconfiguration/api/v1/port.go diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index d8d738c55..f11b996d8 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -62,14 +62,31 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required Selector LabelSelector `json:"selector"` - // TargetPortNumber defines the port number to access the selected model server Pods. + // TargetPorts defines the ports to access the selected model server Pods. + // + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + TargetPorts []Port `json:"targetPorts"` + + // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint + // picker service that picks endpoints for the requests routed to this pool. + EndpointPickerConfig `json:",inline"` +} + +type Port struct { + // PortNumber defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required - TargetPortNumber int32 `json:"targetPortNumber"` + PortNumber int32 `json:"portNumber"` +} +// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. +// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. +type EndpointPickerConfig struct { // Extension configures an endpoint picker as an extension service. ExtensionRef *Extension `json:"extensionRef,omitempty"` } @@ -234,7 +251,7 @@ const ( InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs" // This reason is used with the "ResolvedRefs" condition when the - // Extension is invalid in some way. This can include an unsupported kind + // ExtensionRef is invalid in some way. This can include an unsupported kind // or API group, or a reference to a resource that can not be found. InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef" ) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 8a03dda3b..418b4b3da 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -25,8 +25,65 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { + *out = *in + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig. +func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { + if in == nil { + return nil + } + out := new(EndpointPickerConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Extension) DeepCopyInto(out *Extension) { + *out = *in + in.ExtensionReference.DeepCopyInto(&out.ExtensionReference) + in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. +func (in *Extension) DeepCopy() *Extension { + if in == nil { + return nil + } + out := new(Extension) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { + *out = *in + if in.FailureMode != nil { + in, out := &in.FailureMode, &out.FailureMode + *out = new(ExtensionFailureMode) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. +func (in *ExtensionConnection) DeepCopy() *ExtensionConnection { + if in == nil { + return nil + } + out := new(ExtensionConnection) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { *out = *in if in.Group != nil { in, out := &in.Group, &out.Group @@ -43,19 +100,14 @@ func (in *Extension) DeepCopyInto(out *Extension) { *out = new(PortNumber) **out = **in } - if in.FailureMode != nil { - in, out := &in.FailureMode, &out.FailureMode - *out = new(ExtensionFailureMode) - **out = **in - } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. -func (in *Extension) DeepCopy() *Extension { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference. +func (in *ExtensionReference) DeepCopy() *ExtensionReference { if in == nil { return nil } - out := new(Extension) + out := new(ExtensionReference) in.DeepCopyInto(out) return out } @@ -122,12 +174,19 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = *in - in.Selector.DeepCopyInto(&out.Selector) - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = make(map[LabelKey]LabelValue, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.TargetPorts != nil { + in, out := &in.TargetPorts, &out.TargetPorts + *out = make([]Port, len(*in)) + copy(*out, *in) } + in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. @@ -162,28 +221,6 @@ func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LabelSelector) DeepCopyInto(out *LabelSelector) { - *out = *in - if in.MatchLabels != nil { - in, out := &in.MatchLabels, &out.MatchLabels - *out = make(map[LabelKey]LabelValue, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelSelector. -func (in *LabelSelector) DeepCopy() *LabelSelector { - if in == nil { - return nil - } - out := new(LabelSelector) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ParentGatewayReference) DeepCopyInto(out *ParentGatewayReference) { *out = *in @@ -236,3 +273,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Port) DeepCopyInto(out *Port) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port. +func (in *Port) DeepCopy() *Port { + if in == nil { + return nil + } + out := new(Port) + in.DeepCopyInto(out) + return out +} diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index 8d14a3969..a1c0f8402 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -18,12 +18,16 @@ limitations under the License. package v1 +import ( + apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" +) + // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` + Selector map[apiv1.LabelKey]apiv1.LabelValue `json:"selector,omitempty"` + TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + EndpointPickerConfigApplyConfiguration `json:",inline"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -32,19 +36,30 @@ func InferencePoolSpec() *InferencePoolSpecApplyConfiguration { return &InferencePoolSpecApplyConfiguration{} } -// WithSelector sets the Selector field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the Selector field is set to the value of the last call. -func (b *InferencePoolSpecApplyConfiguration) WithSelector(value *LabelSelectorApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.Selector = value +// WithSelector puts the entries into the Selector field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, the entries provided by each call will be put on the Selector field, +// overwriting an existing map entries in Selector field with the same key. +func (b *InferencePoolSpecApplyConfiguration) WithSelector(entries map[apiv1.LabelKey]apiv1.LabelValue) *InferencePoolSpecApplyConfiguration { + if b.Selector == nil && len(entries) > 0 { + b.Selector = make(map[apiv1.LabelKey]apiv1.LabelValue, len(entries)) + } + for k, v := range entries { + b.Selector[k] = v + } return b } -// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration { - b.TargetPortNumber = &value +// WithTargetPorts adds the given value to the TargetPorts field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the TargetPorts field. +func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApplyConfiguration) *InferencePoolSpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithTargetPorts") + } + b.TargetPorts = append(b.TargetPorts, *values[i]) + } return b } @@ -52,6 +67,6 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ExtensionRef field is set to the value of the last call. func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.ExtensionRef = value + b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value return b } diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go new file mode 100644 index 000000000..b68403f63 --- /dev/null +++ b/client-go/applyconfiguration/api/v1/port.go @@ -0,0 +1,39 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +// PortApplyConfiguration represents a declarative configuration of the Port type for use +// with apply. +type PortApplyConfiguration struct { + PortNumber *int32 `json:"portNumber,omitempty"` +} + +// PortApplyConfiguration constructs a declarative configuration of the Port type for use with +// apply. +func Port() *PortApplyConfiguration { + return &PortApplyConfiguration{} +} + +// WithPortNumber sets the PortNumber field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the PortNumber field is set to the value of the last call. +func (b *PortApplyConfiguration) WithPortNumber(value int32) *PortApplyConfiguration { + b.PortNumber = &value + return b +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index f208bf1d9..c5d4f575e 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -48,6 +48,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &apiv1.ParentGatewayReferenceApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("PoolStatus"): return &apiv1.PoolStatusApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("Port"): + return &apiv1.PortApplyConfiguration{} // Group=inference.networking.x-k8s.io, Version=v1alpha2 case v1alpha2.SchemeGroupVersion.WithKind("Extension"): diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 8bfc9b0b4..36d9644f2 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -131,17 +131,28 @@ spec: required: - matchLabels type: object - targetPortNumber: - description: |- - TargetPortNumber defines the port number to access the selected model server Pods. - The number must be in the range 1 to 65535. - format: int32 - maximum: 65535 - minimum: 1 - type: integer + targetPorts: + description: TargetPorts defines the ports to access the selected + model server Pods. + items: + properties: + portNumber: + description: |- + PortNumber defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - portNumber + type: object + maxItems: 1 + minItems: 1 + type: array required: - selector - - targetPortNumber + - targetPorts type: object status: default: From 3f7d44889900bcba9c84f672088a76ce8d7ed020 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 14:53:44 -0700 Subject: [PATCH 02/38] partially fix ut # Conflicts: # pkg/epp/controller/inferencepool_reconciler_test.go --- cmd/epp/runner/runner.go | 2 +- pkg/epp/backend/metrics/pod_metrics.go | 5 ++++- pkg/epp/backend/metrics/pod_metrics_test.go | 2 +- .../inferencepool_reconciler_test.go | 3 ++- pkg/epp/controller/pod_reconciler_test.go | 21 +++++++++++++++++++ pkg/epp/datastore/datastore_test.go | 2 +- pkg/epp/handlers/request.go | 6 +++++- .../metrics/collectors/inference_pool_test.go | 2 +- pkg/epp/requestcontrol/director.go | 5 ++++- pkg/epp/requestcontrol/director_test.go | 3 +++ pkg/epp/util/testing/wrappers.go | 4 ++-- test/utils/server.go | 2 +- 12 files changed, 46 insertions(+), 11 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index c614b2146..29be0bc80 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -148,7 +148,7 @@ var ( "The configuration specified as text, in lieu of a file") modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+ - "Default value will be set to InferencePool.Spec.TargetPortNumber if not set.") + "Default value will be set to InferencePool.Spec.TargetPorts if not set.") modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods") modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go index eaa77d5c9..6c7fddb7d 100644 --- a/pkg/epp/backend/metrics/pod_metrics.go +++ b/pkg/epp/backend/metrics/pod_metrics.go @@ -117,7 +117,10 @@ func (pm *podMetrics) refreshMetrics() error { } ctx, cancel := context.WithTimeout(context.Background(), fetchMetricsTimeout) defer cancel() - updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPorts[0].PortNumber) if err != nil { pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err) } diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index d3d8e2b45..ce1d9c3f3 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -88,7 +88,7 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPortNumber: 8000}}, nil + return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{PortNumber: 8000}}}}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 8a6fcc4e2..95fe53edb 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -80,6 +80,7 @@ func TestInferencePoolReconciler(t *testing.T) { pool1 := utiltest.MakeInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). + TargetPorts(8080).ObjRef() ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() pool1.SetGroupVersionKind(gvk) @@ -146,7 +147,7 @@ func TestInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - newPool1.Spec.TargetPortNumber = 9090 + newPool1.Spec.TargetPorts = []v1.Port{{PortNumber: 9090}} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index d1dae0f05..5f0d50cc9 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -61,6 +61,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -79,6 +82,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -97,6 +103,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -116,6 +125,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -132,6 +144,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -149,6 +164,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ @@ -167,6 +185,9 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "some-key": "some-val", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 77fe7c7c0..484e41413 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -245,7 +245,7 @@ var ( pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace} inferencePool = &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, }, } ) diff --git a/pkg/epp/handlers/request.go b/pkg/epp/handlers/request.go index d051e163d..26919356f 100644 --- a/pkg/epp/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -17,6 +17,7 @@ limitations under the License. package handlers import ( + "fmt" "strconv" "time" @@ -45,7 +46,10 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP if err != nil { return err } - reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber)) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].PortNumber)) reqCtx.RequestSize = 0 reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx) return nil diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index e855c55c1..f1cd2f65f 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) { Name: "test-pool", }, Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, }, } _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index c40b5864d..415b2d479 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -240,7 +240,10 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC return reqCtx, err } targetPods := []*backend.Pod{} - targetPort := int(pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"} + } + targetPort := int(pool.Spec.TargetPorts[0].PortNumber) targetEndpoints := []string{} for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods { diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index c25b9640d..4d17d90a2 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -101,6 +101,9 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ + TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + Selector: map[v1.LabelKey]v1.LabelValue{ + "app": "inference", TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index d9994024a..d145e2909 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -200,8 +200,8 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo return m } -func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper { - m.Spec.TargetPortNumber = p +func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { + m.Spec.TargetPorts = []v1.Port{{PortNumber: p}} return m } diff --git a/test/utils/server.go b/test/utils/server.go index f76e147af..c28749592 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po WithObjects(initObjs...). Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() - pool.Spec.TargetPortNumber = poolPort + pool.Spec.TargetPorts = []v1.Port{{PortNumber: poolPort}} _ = ds.PoolSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc From 90a7e9794a1a67775a2a0f55079c1e4f45145176 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 15:21:48 -0700 Subject: [PATCH 03/38] change inferencepool ut temporarily --- pkg/epp/controller/inferencepool_reconciler_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 95fe53edb..8cbbf2ffc 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -220,9 +220,12 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } + //TODO: change targetport to 8080 pool1 := utiltest.MakeXInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). + TargetPortNumber(0).ObjRef() + pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ObjRef() ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() @@ -324,11 +327,10 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa if gotPool == nil && params.wantPool == nil { return "" } - gotXPool := &v1alpha2.InferencePool{} - err := gotXPool.ConvertFrom(gotPool) + gotXPool, err := v1alpha2.ConvertFrom(gotPool) if err != nil { - t.Fatalf("failed to convert InferencePool to XInferencePool: %v", err) + t.Fatalf("failed to convert unstructured to InferencePool: %v", err) } if diff := cmp.Diff(params.wantPool, gotXPool); diff != "" { return "pool:" + diff From 922833c8c767675f7c481f9b842e0dbd5b1051b5 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 15:37:11 -0700 Subject: [PATCH 04/38] change comments --- pkg/epp/controller/inferencepool_reconciler_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 8cbbf2ffc..d25f275de 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -220,7 +220,7 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } - //TODO: change targetport to 8080 + // TODO: change targetport to 8080 pool1 := utiltest.MakeXInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). From 55bfa1bc47457c7c68c5d529b0781d9edb6ad04f Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 15:53:42 -0700 Subject: [PATCH 05/38] changed yaml --- config/charts/inferencepool/templates/inferencepool.yaml | 3 ++- config/manifests/inferencepool-resources.yaml | 3 ++- conformance/resources/base.yaml | 6 ++++-- site-src/api-types/inferencepool.md | 3 ++- site-src/guides/implementers.md | 3 ++- site-src/guides/inferencepool-rollout.md | 3 ++- test/testdata/inferencepool-e2e.yaml | 3 ++- test/testdata/inferencepool-with-model-hermetic.yaml | 3 ++- 8 files changed, 18 insertions(+), 9 deletions(-) diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index ecddbfaa7..d4247a51c 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -7,7 +7,8 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: - targetPortNumber: {{ .Values.inferencePool.targetPortNumber }} + targetPorts: + - portNumber: {{ .Values.inferencePool.targetPortNumber }} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 33a7b8e74..73d9f973d 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -8,7 +8,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml index e3d7561bb..f56ae5636 100644 --- a/conformance/resources/base.yaml +++ b/conformance/resources/base.yaml @@ -153,6 +153,9 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: + app: primary-inference-model-server + targetPorts: + - portNumber: 3000 matchLabels: app: primary-inference-model-server targetPortNumber: 3000 @@ -247,8 +250,7 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: - matchLabels: - app: secondary-inference-model-server + app: secondary-inference-model-server targetPortNumber: 3000 extensionRef: name: secondary-endpoint-picker-svc diff --git a/site-src/api-types/inferencepool.md b/site-src/api-types/inferencepool.md index d2794478b..caac3a1d4 100644 --- a/site-src/api-types/inferencepool.md +++ b/site-src/api-types/inferencepool.md @@ -33,7 +33,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/implementers.md b/site-src/guides/implementers.md index 747e934a2..b7358ce4f 100644 --- a/site-src/guides/implementers.md +++ b/site-src/guides/implementers.md @@ -47,7 +47,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md index 80a6e3bf6..aad91d1ec 100644 --- a/site-src/guides/inferencepool-rollout.md +++ b/site-src/guides/inferencepool-rollout.md @@ -208,7 +208,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct-new spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: app: vllm-llama3-8b-instruct-new extensionRef: diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index aec574a39..411b1fb25 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -4,7 +4,8 @@ metadata: labels: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 707a76eed..1df1e06fc 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -4,7 +4,8 @@ metadata: name: vllm-llama3-8b-instruct-pool namespace: default spec: - targetPortNumber: 8000 + targetPorts: + - portNumber: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct-pool From 67eefa23f952ef6f30efff6debd7b6154ef5091a Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 16:13:01 -0700 Subject: [PATCH 06/38] added temporary fix --- pkg/epp/controller/inferencepool_reconciler_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index d25f275de..e4e151e54 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -220,7 +220,7 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } - // TODO: change targetport to 8080 + // TODO: change it to 8080 pool1 := utiltest.MakeXInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). @@ -290,7 +290,8 @@ func TestXInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - newPool1.Spec.TargetPortNumber = 9090 + // TODO: change it later to 9090 + newPool1.Spec.TargetPortNumber = 0 if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } From 7524401fa95b77e55fd74833990dd08838f8ff47 Mon Sep 17 00:00:00 2001 From: capri-xiyue <52932582+capri-xiyue@users.noreply.github.com> Date: Mon, 11 Aug 2025 16:27:58 -0700 Subject: [PATCH 07/38] Update api/v1/inferencepool_types.go Co-authored-by: Rob Scott --- api/v1/inferencepool_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index f11b996d8..cac4195cb 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -81,7 +81,7 @@ type Port struct { // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required - PortNumber int32 `json:"portNumber"` + Number int32 `json:"number"` } // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. From cfb2526883c0bee00c3dc808dcf4d4f369929290 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 16:39:15 -0700 Subject: [PATCH 08/38] fixed ut and e2e to adopt naming change --- client-go/applyconfiguration/api/v1/port.go | 10 +++++----- .../inferencepool/templates/inferencepool.yaml | 2 +- ...inference.networking.k8s.io_inferencepools.yaml | 4 ++-- config/manifests/inferencepool-resources.yaml | 2 +- conformance/resources/base.yaml | 4 +++- pkg/epp/backend/metrics/pod_metrics.go | 2 +- pkg/epp/backend/metrics/pod_metrics_test.go | 2 +- .../controller/inferencepool_reconciler_test.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 14 +++++++------- pkg/epp/datastore/datastore_test.go | 2 +- pkg/epp/handlers/request.go | 2 +- pkg/epp/metrics/collectors/inference_pool_test.go | 2 +- pkg/epp/requestcontrol/director.go | 2 +- pkg/epp/requestcontrol/director_test.go | 2 +- pkg/epp/util/testing/wrappers.go | 2 +- site-src/api-types/inferencepool.md | 2 +- site-src/guides/implementers.md | 2 +- test/testdata/inferencepool-e2e.yaml | 2 +- .../inferencepool-with-model-hermetic.yaml | 2 +- test/utils/server.go | 2 +- 20 files changed, 33 insertions(+), 31 deletions(-) diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go index b68403f63..266244d0c 100644 --- a/client-go/applyconfiguration/api/v1/port.go +++ b/client-go/applyconfiguration/api/v1/port.go @@ -21,7 +21,7 @@ package v1 // PortApplyConfiguration represents a declarative configuration of the Port type for use // with apply. type PortApplyConfiguration struct { - PortNumber *int32 `json:"portNumber,omitempty"` + Number *int32 `json:"number,omitempty"` } // PortApplyConfiguration constructs a declarative configuration of the Port type for use with @@ -30,10 +30,10 @@ func Port() *PortApplyConfiguration { return &PortApplyConfiguration{} } -// WithPortNumber sets the PortNumber field in the declarative configuration to the given value +// WithNumber sets the Number field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the PortNumber field is set to the value of the last call. -func (b *PortApplyConfiguration) WithPortNumber(value int32) *PortApplyConfiguration { - b.PortNumber = &value +// If called multiple times, the Number field is set to the value of the last call. +func (b *PortApplyConfiguration) WithNumber(value int32) *PortApplyConfiguration { + b.Number = &value return b } diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index d4247a51c..fbce19cf0 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -8,7 +8,7 @@ metadata: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: targetPorts: - - portNumber: {{ .Values.inferencePool.targetPortNumber }} + - number: {{ .Values.inferencePool.targetPortNumber }} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 36d9644f2..784302c9a 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -136,7 +136,7 @@ spec: model server Pods. items: properties: - portNumber: + number: description: |- PortNumber defines the port number to access the selected model server Pods. The number must be in the range 1 to 65535. @@ -145,7 +145,7 @@ spec: minimum: 1 type: integer required: - - portNumber + - number type: object maxItems: 1 minItems: 1 diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 73d9f973d..a3eba5582 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -9,7 +9,7 @@ metadata: name: vllm-llama3-8b-instruct spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml index f56ae5636..015e89557 100644 --- a/conformance/resources/base.yaml +++ b/conformance/resources/base.yaml @@ -159,6 +159,7 @@ spec: matchLabels: app: primary-inference-model-server targetPortNumber: 3000 + - number: 3000 extensionRef: name: primary-endpoint-picker-svc --- @@ -251,7 +252,8 @@ metadata: spec: selector: app: secondary-inference-model-server - targetPortNumber: 3000 + targetPorts: + - portNumber: 3000 extensionRef: name: secondary-endpoint-picker-svc failureMode: FailOpen diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go index 6c7fddb7d..8a1a72b9f 100644 --- a/pkg/epp/backend/metrics/pod_metrics.go +++ b/pkg/epp/backend/metrics/pod_metrics.go @@ -120,7 +120,7 @@ func (pm *podMetrics) refreshMetrics() error { if len(pool.Spec.TargetPorts) != 1 { return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) } - updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPorts[0].PortNumber) + updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPorts[0].Number) if err != nil { pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err) } diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index ce1d9c3f3..9a0e1a6fc 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -88,7 +88,7 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{PortNumber: 8000}}}}, nil + return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index e4e151e54..2c66a3c26 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -147,7 +147,7 @@ func TestInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - newPool1.Spec.TargetPorts = []v1.Port{{PortNumber: 9090}} + newPool1.Spec.TargetPorts = []v1.Port{{Number: 9090}} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 5f0d50cc9..7c3a2f3d6 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -61,7 +61,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -82,7 +82,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -103,7 +103,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -125,7 +125,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -144,7 +144,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -164,7 +164,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), @@ -185,7 +185,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", TargetPortNumber: int32(8000), diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 484e41413..dc60458b6 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -245,7 +245,7 @@ var ( pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace} inferencePool = &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, }, } ) diff --git a/pkg/epp/handlers/request.go b/pkg/epp/handlers/request.go index 26919356f..7f8122195 100644 --- a/pkg/epp/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -49,7 +49,7 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP if len(pool.Spec.TargetPorts) != 1 { return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) } - reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].PortNumber)) + reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].Number)) reqCtx.RequestSize = 0 reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx) return nil diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index f1cd2f65f..6d4291b6d 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) { Name: "test-pool", }, Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, }, } _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index 415b2d479..1435c0154 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -243,7 +243,7 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC if len(pool.Spec.TargetPorts) != 1 { return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"} } - targetPort := int(pool.Spec.TargetPorts[0].PortNumber) + targetPort := int(pool.Spec.TargetPorts[0].Number) targetEndpoints := []string{} for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods { diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 4d17d90a2..0731c3a68 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -101,7 +101,7 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{PortNumber: int32(8000)}}, + TargetPorts: []v1.Port{{Number: int32(8000)}}, Selector: map[v1.LabelKey]v1.LabelValue{ "app": "inference", TargetPortNumber: int32(8000), diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index d145e2909..96d10147b 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -201,7 +201,7 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo } func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { - m.Spec.TargetPorts = []v1.Port{{PortNumber: p}} + m.Spec.TargetPorts = []v1.Port{{Number: p}} return m } diff --git a/site-src/api-types/inferencepool.md b/site-src/api-types/inferencepool.md index caac3a1d4..c4481b1ad 100644 --- a/site-src/api-types/inferencepool.md +++ b/site-src/api-types/inferencepool.md @@ -34,7 +34,7 @@ metadata: name: vllm-llama3-8b-instruct spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/implementers.md b/site-src/guides/implementers.md index b7358ce4f..6fce01657 100644 --- a/site-src/guides/implementers.md +++ b/site-src/guides/implementers.md @@ -48,7 +48,7 @@ metadata: name: vllm-llama3-8b-instruct spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index 411b1fb25..7c546ba37 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -5,7 +5,7 @@ metadata: name: vllm-llama3-8b-instruct spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 1df1e06fc..20fea8d1a 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -5,7 +5,7 @@ metadata: namespace: default spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct-pool diff --git a/test/utils/server.go b/test/utils/server.go index c28749592..bcec8bca0 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po WithObjects(initObjs...). Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() - pool.Spec.TargetPorts = []v1.Port{{PortNumber: poolPort}} + pool.Spec.TargetPorts = []v1.Port{{Number: poolPort}} _ = ds.PoolSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc From 5c3cdcdb2d08bce2c57d564faaa5167da4eda5c4 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 16:42:51 -0700 Subject: [PATCH 09/38] added listtype tag --- api/v1/inferencepool_types.go | 4 +++- .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index cac4195cb..7dc4705d2 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -67,6 +67,8 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 + // +listType=set + // +listTypeKey=number TargetPorts []Port `json:"targetPorts"` // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint @@ -75,7 +77,7 @@ type InferencePoolSpec struct { } type Port struct { - // PortNumber defines the port number to access the selected model server Pods. + // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // // +kubebuilder:validation:Minimum=1 diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 784302c9a..71eeae79a 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -138,7 +138,7 @@ spec: properties: number: description: |- - PortNumber defines the port number to access the selected model server Pods. + Number defines the port number to access the selected model server Pods. The number must be in the range 1 to 65535. format: int32 maximum: 65535 @@ -150,6 +150,7 @@ spec: maxItems: 1 minItems: 1 type: array + x-kubernetes-list-type: set required: - selector - targetPorts From 4262c0e4f9c4080950f4d33e9a8fde75897dcec5 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 17:04:14 -0700 Subject: [PATCH 10/38] revert back: --- api/v1/inferencepool_types.go | 2 -- .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 7dc4705d2..839c44e18 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -67,8 +67,6 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 - // +listType=set - // +listTypeKey=number TargetPorts []Port `json:"targetPorts"` // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 71eeae79a..9d5062d6f 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -150,7 +150,6 @@ spec: maxItems: 1 minItems: 1 type: array - x-kubernetes-list-type: set required: - selector - targetPorts From 16b2c4385a89dc82b94ddd1159481cc44c699564 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Mon, 11 Aug 2025 17:08:55 -0700 Subject: [PATCH 11/38] updated to include list map --- api/v1/inferencepool_types.go | 2 ++ .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 3 +++ 2 files changed, 5 insertions(+) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 839c44e18..d4a9d06d9 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -67,6 +67,8 @@ type InferencePoolSpec struct { // +kubebuilder:validation:Required // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 + // +listType=map + // +listMapKey=number TargetPorts []Port `json:"targetPorts"` // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 9d5062d6f..7d061d1aa 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -150,6 +150,9 @@ spec: maxItems: 1 minItems: 1 type: array + x-kubernetes-list-map-keys: + - number + x-kubernetes-list-type: map required: - selector - targetPorts From 9f85e4feef1b02b7f08dae2c03dba44667f86235 Mon Sep 17 00:00:00 2001 From: capri-xiyue <52932582+capri-xiyue@users.noreply.github.com> Date: Tue, 12 Aug 2025 09:17:52 -0700 Subject: [PATCH 12/38] Update site-src/guides/inferencepool-rollout.md Co-authored-by: Aslak Knutsen --- site-src/guides/inferencepool-rollout.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md index aad91d1ec..98b3cc4cc 100644 --- a/site-src/guides/inferencepool-rollout.md +++ b/site-src/guides/inferencepool-rollout.md @@ -209,7 +209,7 @@ metadata: name: vllm-llama3-8b-instruct-new spec: targetPorts: - - portNumber: 8000 + - number: 8000 selector: app: vllm-llama3-8b-instruct-new extensionRef: From 11fc5b38615869db23df2752e0f6e22730bcb3f0 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Tue, 12 Aug 2025 12:59:07 -0700 Subject: [PATCH 13/38] rebase --- api/v1/inferencepool_types.go | 14 +--- api/v1/zz_generated.deepcopy.go | 74 ++++--------------- .../api/v1/inferencepoolspec.go | 8 +- 3 files changed, 21 insertions(+), 75 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index d4a9d06d9..eb12c7660 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -71,9 +71,10 @@ type InferencePoolSpec struct { // +listMapKey=number TargetPorts []Port `json:"targetPorts"` - // EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint - // picker service that picks endpoints for the requests routed to this pool. - EndpointPickerConfig `json:",inline"` + // Extension configures an endpoint picker as an extension service. + // + // +kubebuilder:validation:Required + ExtensionRef *Extension `json:"extensionRef,omitempty"` } type Port struct { @@ -86,13 +87,6 @@ type Port struct { Number int32 `json:"number"` } -// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. -// This type is intended to be a union of mutually exclusive configuration options that we may add in the future. -type EndpointPickerConfig struct { - // Extension configures an endpoint picker as an extension service. - ExtensionRef *Extension `json:"extensionRef,omitempty"` -} - // Extension specifies how to configure an extension that runs the endpoint picker. type Extension struct { // Group is the group of the referent. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 418b4b3da..4a95a0ed7 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -25,65 +25,8 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig) { - *out = *in - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig. -func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig { - if in == nil { - return nil - } - out := new(EndpointPickerConfig) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Extension) DeepCopyInto(out *Extension) { - *out = *in - in.ExtensionReference.DeepCopyInto(&out.ExtensionReference) - in.ExtensionConnection.DeepCopyInto(&out.ExtensionConnection) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. -func (in *Extension) DeepCopy() *Extension { - if in == nil { - return nil - } - out := new(Extension) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection) { - *out = *in - if in.FailureMode != nil { - in, out := &in.FailureMode, &out.FailureMode - *out = new(ExtensionFailureMode) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection. -func (in *ExtensionConnection) DeepCopy() *ExtensionConnection { - if in == nil { - return nil - } - out := new(ExtensionConnection) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { *out = *in if in.Group != nil { in, out := &in.Group, &out.Group @@ -100,14 +43,19 @@ func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference) { *out = new(PortNumber) **out = **in } + if in.FailureMode != nil { + in, out := &in.FailureMode, &out.FailureMode + *out = new(ExtensionFailureMode) + **out = **in + } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference. -func (in *ExtensionReference) DeepCopy() *ExtensionReference { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension. +func (in *Extension) DeepCopy() *Extension { if in == nil { return nil } - out := new(ExtensionReference) + out := new(Extension) in.DeepCopyInto(out) return out } @@ -186,7 +134,11 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } - in.EndpointPickerConfig.DeepCopyInto(&out.EndpointPickerConfig) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index a1c0f8402..c116ad89e 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -25,9 +25,9 @@ import ( // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apiv1.LabelKey]apiv1.LabelValue `json:"selector,omitempty"` - TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` - EndpointPickerConfigApplyConfiguration `json:",inline"` + Selector map[apiv1.LabelKey]apiv1.LabelValue `json:"selector,omitempty"` + TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -67,6 +67,6 @@ func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApp // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the ExtensionRef field is set to the value of the last call. func (b *InferencePoolSpecApplyConfiguration) WithExtensionRef(value *ExtensionApplyConfiguration) *InferencePoolSpecApplyConfiguration { - b.EndpointPickerConfigApplyConfiguration.ExtensionRef = value + b.ExtensionRef = value return b } From 02212a4f8643f8857fd1e4ef1530125b22c3db37 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Wed, 13 Aug 2025 10:49:03 -0700 Subject: [PATCH 14/38] partially fix conflicts --- api/v1/zz_generated.deepcopy.go | 30 ++++++++++++++----- .../api/v1/inferencepoolspec.go | 26 +++++----------- pkg/epp/controller/pod_reconciler_test.go | 21 ------------- pkg/epp/requestcontrol/director_test.go | 3 -- 4 files changed, 31 insertions(+), 49 deletions(-) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 4a95a0ed7..977cac82a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -122,13 +122,7 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = *in - if in.Selector != nil { - in, out := &in.Selector, &out.Selector - *out = make(map[LabelKey]LabelValue, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } + in.Selector.DeepCopyInto(&out.Selector) if in.TargetPorts != nil { in, out := &in.TargetPorts, &out.TargetPorts *out = make([]Port, len(*in)) @@ -173,6 +167,28 @@ func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LabelSelector) DeepCopyInto(out *LabelSelector) { + *out = *in + if in.MatchLabels != nil { + in, out := &in.MatchLabels, &out.MatchLabels + *out = make(map[LabelKey]LabelValue, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LabelSelector. +func (in *LabelSelector) DeepCopy() *LabelSelector { + if in == nil { + return nil + } + out := new(LabelSelector) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ParentGatewayReference) DeepCopyInto(out *ParentGatewayReference) { *out = *in diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index c116ad89e..364d059dd 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -18,16 +18,12 @@ limitations under the License. package v1 -import ( - apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" -) - // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector map[apiv1.LabelKey]apiv1.LabelValue `json:"selector,omitempty"` - TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` - ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` + Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` + TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -36,17 +32,11 @@ func InferencePoolSpec() *InferencePoolSpecApplyConfiguration { return &InferencePoolSpecApplyConfiguration{} } -// WithSelector puts the entries into the Selector field in the declarative configuration -// and returns the receiver, so that objects can be build by chaining "With" function invocations. -// If called multiple times, the entries provided by each call will be put on the Selector field, -// overwriting an existing map entries in Selector field with the same key. -func (b *InferencePoolSpecApplyConfiguration) WithSelector(entries map[apiv1.LabelKey]apiv1.LabelValue) *InferencePoolSpecApplyConfiguration { - if b.Selector == nil && len(entries) > 0 { - b.Selector = make(map[apiv1.LabelKey]apiv1.LabelValue, len(entries)) - } - for k, v := range entries { - b.Selector[k] = v - } +// WithSelector sets the Selector field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Selector field is set to the value of the last call. +func (b *InferencePoolSpecApplyConfiguration) WithSelector(value *LabelSelectorApplyConfiguration) *InferencePoolSpecApplyConfiguration { + b.Selector = value return b } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 7c3a2f3d6..518dae135 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -62,9 +62,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -83,9 +80,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -104,9 +98,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -126,9 +117,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -145,9 +133,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -165,9 +150,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -186,9 +168,6 @@ func TestPodReconciler(t *testing.T) { pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "some-key": "some-val", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 0731c3a68..6919ae45c 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -102,9 +102,6 @@ func TestDirector_HandleRequest(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ TargetPorts: []v1.Port{{Number: int32(8000)}}, - Selector: map[v1.LabelKey]v1.LabelValue{ - "app": "inference", - TargetPortNumber: int32(8000), Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "app": "inference", From ca973983b1c5d834e00a89dad349d93f8240a322 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Wed, 13 Aug 2025 13:58:31 -0700 Subject: [PATCH 15/38] rebase with conversion change --- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- .../v1alpha2/inferencepool_conversion_test.go | 4 ++-- .../inferencepool_reconciler_test.go | 20 +++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 77e6ffa4a..33992aa2e 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber + dst.Spec.TargetPorts = []v1.Port{{Number: src.Spec.TargetPortNumber}} dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { @@ -68,7 +68,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber + dst.Spec.TargetPortNumber = src.Spec.TargetPorts[0].Number dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index f395f99f1..cea9b3626 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -99,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, @@ -234,7 +234,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 2c66a3c26..811e4d59e 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -80,9 +80,8 @@ func TestInferencePoolReconciler(t *testing.T) { pool1 := utiltest.MakeInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). - TargetPorts(8080).ObjRef() - ExtensionRef("epp-service"). - TargetPortNumber(8080).ObjRef() + TargetPorts(8080). + ExtensionRef("epp-service").ObjRef() pool1.SetGroupVersionKind(gvk) pool2 := utiltest.MakeInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() pool2.SetGroupVersionKind(gvk) @@ -220,15 +219,15 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } - // TODO: change it to 8080 pool1 := utiltest.MakeXInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). - TargetPortNumber(0).ObjRef() - pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ObjRef() ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() - pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() + pool2 := utiltest.MakeXInferencePool("pool2"). + Namespace("pool2-ns"). + ExtensionRef("epp-service"). + TargetPortNumber(8080).ObjRef() pool1.SetGroupVersionKind(gvk) pool2.SetGroupVersionKind(gvk) @@ -290,8 +289,7 @@ func TestXInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - // TODO: change it later to 9090 - newPool1.Spec.TargetPortNumber = 0 + newPool1.Spec.TargetPortNumber = 9090 if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } @@ -329,7 +327,9 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa return "" } - gotXPool, err := v1alpha2.ConvertFrom(gotPool) + gotXPool := &v1alpha2.InferencePool{} + + err := gotXPool.ConvertFrom(gotPool) if err != nil { t.Fatalf("failed to convert unstructured to InferencePool: %v", err) } From 93ab2980ae84ba6e9e22f47f0bbc315eae021fb1 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Wed, 13 Aug 2025 14:16:14 -0700 Subject: [PATCH 16/38] updated generate --- config/crd/bases/inference.networking.k8s.io_inferencepools.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 7d061d1aa..5fbaad98b 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -154,6 +154,7 @@ spec: - number x-kubernetes-list-type: map required: + - extensionRef - selector - targetPorts type: object From 14a74b6b2677b0b82f6cb2023811cba1aa98e617 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Wed, 13 Aug 2025 14:26:31 -0700 Subject: [PATCH 17/38] added ut --- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index cea9b3626..ce12c625c 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: int32(8080)}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ From 1f9ff74ffe22a5e5816871f2ddd23974e93ba3b6 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 08:51:05 -0700 Subject: [PATCH 18/38] rebase main --- api/v1/inferencepool_types.go | 2 -- .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index eb12c7660..f71418510 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -72,8 +72,6 @@ type InferencePoolSpec struct { TargetPorts []Port `json:"targetPorts"` // Extension configures an endpoint picker as an extension service. - // - // +kubebuilder:validation:Required ExtensionRef *Extension `json:"extensionRef,omitempty"` } diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 5fbaad98b..7d061d1aa 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -154,7 +154,6 @@ spec: - number x-kubernetes-list-type: map required: - - extensionRef - selector - targetPorts type: object From 753af72a1fbaa5a6af8ddbaf5c041827ba420fea Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 08:58:15 -0700 Subject: [PATCH 19/38] fixed merge error --- conformance/resources/base.yaml | 10 ++++------ pkg/epp/controller/inferencepool_reconciler_test.go | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml index 015e89557..5e00a6971 100644 --- a/conformance/resources/base.yaml +++ b/conformance/resources/base.yaml @@ -153,12 +153,9 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: - app: primary-inference-model-server - targetPorts: - - portNumber: 3000 matchLabels: app: primary-inference-model-server - targetPortNumber: 3000 + targetPorts: - number: 3000 extensionRef: name: primary-endpoint-picker-svc @@ -251,9 +248,10 @@ metadata: namespace: gateway-conformance-app-backend spec: selector: - app: secondary-inference-model-server + matchLabels: + app: primary-inference-model-server targetPorts: - - portNumber: 3000 + - number: 3000 extensionRef: name: secondary-endpoint-picker-svc failureMode: FailOpen diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 811e4d59e..363ec9014 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -331,7 +331,7 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa err := gotXPool.ConvertFrom(gotPool) if err != nil { - t.Fatalf("failed to convert unstructured to InferencePool: %v", err) + t.Fatalf("failed to convert InferencePool to XInferencePool: %v", err) } if diff := cmp.Diff(params.wantPool, gotXPool); diff != "" { return "pool:" + diff From cce86b8cccd01977ce6f961148fa870fa0c49554 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 09:55:58 -0700 Subject: [PATCH 20/38] updated helm chart --- cmd/epp/runner/runner.go | 2 +- config/charts/inferencepool/templates/inferencepool.yaml | 4 +++- config/charts/inferencepool/values.yaml | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 29be0bc80..316527d43 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -148,7 +148,7 @@ var ( "The configuration specified as text, in lieu of a file") modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+ - "Default value will be set to InferencePool.Spec.TargetPorts if not set.") + "Default value will be set to the first entry of InferencePool.Spec.TargetPorts if not set.") modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods") modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index fbce19cf0..ad13f2d43 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -8,7 +8,9 @@ metadata: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: targetPorts: - - number: {{ .Values.inferencePool.targetPortNumber }} + {{- range .Values.inferencePool.targetPorts }} + - number: {{ .number }} + {{- end }} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 95eb07731..6246282b9 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -39,7 +39,8 @@ inferenceExtension: enableLeaderElection: false inferencePool: - targetPortNumber: 8000 + targetPorts: + - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm # modelServers: # REQUIRED # matchLabels: From 119d44504dedee1d035770c810448c3d69972ea6 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 09:59:26 -0700 Subject: [PATCH 21/38] updated message --- cmd/epp/runner/runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 316527d43..6b14d53be 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -148,7 +148,7 @@ var ( "The configuration specified as text, in lieu of a file") modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+ - "Default value will be set to the first entry of InferencePool.Spec.TargetPorts if not set.") + "Default value will be set to the InferencePool.Spec.TargetPorts[0].Number if not set.") modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods") modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") From e0c297c6ea3d47c50e32f00e0c2d9b2a0cc938a1 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 10:38:30 -0700 Subject: [PATCH 22/38] updated to use alias --- api/v1/inferencepool_types.go | 2 +- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- client-go/applyconfiguration/api/v1/port.go | 8 ++++++-- pkg/epp/backend/metrics/pod_metrics.go | 2 +- pkg/epp/controller/pod_reconciler_test.go | 14 +++++++------- pkg/epp/datastore/datastore_test.go | 2 +- pkg/epp/metrics/collectors/inference_pool_test.go | 2 +- pkg/epp/requestcontrol/director_test.go | 2 +- pkg/epp/util/testing/wrappers.go | 2 +- test/utils/server.go | 2 +- 10 files changed, 22 insertions(+), 18 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index f71418510..9b6b04288 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -82,7 +82,7 @@ type Port struct { // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required - Number int32 `json:"number"` + Number PortNumber `json:"number"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 33992aa2e..94a887ea7 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPorts = []v1.Port{{Number: src.Spec.TargetPortNumber}} + dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { @@ -68,7 +68,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPortNumber = src.Spec.TargetPorts[0].Number + dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go index 266244d0c..6067a5d38 100644 --- a/client-go/applyconfiguration/api/v1/port.go +++ b/client-go/applyconfiguration/api/v1/port.go @@ -18,10 +18,14 @@ limitations under the License. package v1 +import ( + apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" +) + // PortApplyConfiguration represents a declarative configuration of the Port type for use // with apply. type PortApplyConfiguration struct { - Number *int32 `json:"number,omitempty"` + Number *apiv1.PortNumber `json:"number,omitempty"` } // PortApplyConfiguration constructs a declarative configuration of the Port type for use with @@ -33,7 +37,7 @@ func Port() *PortApplyConfiguration { // WithNumber sets the Number field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. // If called multiple times, the Number field is set to the value of the last call. -func (b *PortApplyConfiguration) WithNumber(value int32) *PortApplyConfiguration { +func (b *PortApplyConfiguration) WithNumber(value apiv1.PortNumber) *PortApplyConfiguration { b.Number = &value return b } diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go index 8a1a72b9f..da66a97ed 100644 --- a/pkg/epp/backend/metrics/pod_metrics.go +++ b/pkg/epp/backend/metrics/pod_metrics.go @@ -120,7 +120,7 @@ func (pm *podMetrics) refreshMetrics() error { if len(pool.Spec.TargetPorts) != 1 { return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) } - updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPorts[0].Number) + updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), int32(pool.Spec.TargetPorts[0].Number)) if err != nil { pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err) } diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index 518dae135..5ceb3efdb 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -61,7 +61,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -79,7 +79,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -97,7 +97,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -116,7 +116,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -132,7 +132,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -149,7 +149,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -167,7 +167,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index dc60458b6..271c31ee7 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -245,7 +245,7 @@ var ( pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace} inferencePool = &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } ) diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index 6d4291b6d..dcac3b37d 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) { Name: "test-pool", }, Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index 6919ae45c..e7968d4d9 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -101,7 +101,7 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ - TargetPorts: []v1.Port{{Number: int32(8000)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "app": "inference", diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 96d10147b..a12fed536 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -201,7 +201,7 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo } func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { - m.Spec.TargetPorts = []v1.Port{{Number: p}} + m.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(p)}} return m } diff --git a/test/utils/server.go b/test/utils/server.go index bcec8bca0..51eb33fa0 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po WithObjects(initObjs...). Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() - pool.Spec.TargetPorts = []v1.Port{{Number: poolPort}} + pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}} _ = ds.PoolSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc From 4ccccfe3e043c03a842289574dfd415330166fc8 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 10:47:32 -0700 Subject: [PATCH 23/38] fixed ut --- apix/v1alpha2/inferencepool_conversion_test.go | 8 ++++---- config/charts/inferencepool/values.yaml | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index ce12c625c..6b1190130 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -99,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPorts: []v1.Port{{Number: int32(8080)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, @@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPorts: []v1.Port{{Number: int32(8080)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -234,7 +234,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPorts: []v1.Port{{Number: int32(8080)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: &v1Kind, @@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPorts: []v1.Port{{Number: int32(8080)}}, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 6246282b9..5f0c2fd04 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -42,9 +42,9 @@ inferencePool: targetPorts: - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm - # modelServers: # REQUIRED - # matchLabels: - # app: vllm-llama3-8b-instruct + modelServers: # REQUIRED + matchLabels: + app: vllm-llama3-8b-instruct provider: name: none From 357b96f7db9c06a0dca0e39ece56d439d413287f Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 13:39:20 -0700 Subject: [PATCH 24/38] merge conflicts --- api/v1/inferencepool_types.go | 22 +++++++++++----- api/v1/zz_generated.deepcopy.go | 26 ++++++++++++++++++- apix/v1alpha2/inferencepool_conversion.go | 12 ++++----- .../v1alpha2/inferencepool_conversion_test.go | 13 +++++----- ...ence.networking.k8s.io_inferencepools.yaml | 2 -- pkg/epp/util/testing/wrappers.go | 2 +- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index b1ce91e56..aaf0f51ab 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -66,17 +66,25 @@ type InferencePoolSpec struct { // +required Selector LabelSelector `json:"selector,omitempty,omitzero"` - // TargetPortNumber defines the port number to access the selected model server Pods. + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + // +listType=map + // +listMapKey=number + TargetPorts []Port `json:"targetPorts"` + + // Extension configures an endpoint picker as an extension service. + // +optional + ExtensionRef *Extension `json:"extensionRef,omitempty,omitzero"` +} + +type Port struct { + // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // // +kubebuilder:validation:Minimum=1 // +kubebuilder:validation:Maximum=65535 - // +required - TargetPortNumber int32 `json:"targetPortNumber,omitempty"` - - // Extension configures an endpoint picker as an extension service. - // +optional - ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` + // +kubebuilder:validation:Required + Number PortNumber `json:"number"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 11fa77fdb..b3af93be4 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -108,7 +108,16 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object { func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = *in in.Selector.DeepCopyInto(&out.Selector) - in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) + if in.TargetPorts != nil { + in, out := &in.TargetPorts, &out.TargetPorts + *out = make([]Port, len(*in)) + copy(*out, *in) + } + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. @@ -207,3 +216,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Port) DeepCopyInto(out *Port) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port. +func (in *Port) DeepCopy() *Port { + if in == nil { + return nil + } + out := new(Port) + in.DeepCopyInto(out) + return out +} diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index cbb2c3954..332c95a1c 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -58,7 +58,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { if src == nil { return errors.New("src cannot be nil") } - extensionRef, err := convertExtensionRefFromV1(&src.Spec.ExtensionRef) + extensionRef, err := convertExtensionRefFromV1(src.Spec.ExtensionRef) if err != nil { return err } @@ -102,19 +102,19 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err return convert[InferencePoolStatus](u) } -func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { +func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { if src == nil { - return v1.Extension{}, nil + return nil, nil } u, err := toUnstructured(src) if err != nil { - return v1.Extension{}, err + return nil, err } out, err := convert[v1.Extension](u) if err != nil { - return v1.Extension{}, err + return nil, err } - return *out, nil + return out, nil } func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 5c79861c4..2856610c7 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -99,8 +99,8 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, - ExtensionRef: v1.Extension{ + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, + ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -234,8 +234,8 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, - ExtensionRef: v1.Extension{ + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, + ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -348,7 +348,6 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{}, }, Status: InferencePoolStatus{ Parents: []PoolStatus{ diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 30ca75e05..ed2b16f8a 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -133,8 +133,6 @@ spec: - matchLabels type: object targetPorts: - description: TargetPorts defines the ports to access the selected - model server Pods. items: properties: number: diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a47683d47..a12fed536 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -206,7 +206,7 @@ func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { } func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper { - m.Spec.ExtensionRef = v1.Extension{Name: v1.ObjectName(name)} + m.Spec.ExtensionRef = &v1.Extension{Name: v1.ObjectName(name)} return m } From 2d4a599ffae209fff6ee4e73d5c8b31836ca604d Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 13:52:42 -0700 Subject: [PATCH 25/38] fixed kubectl cli linter --- api/v1/inferencepool_types.go | 5 ++--- api/v1/zz_generated.deepcopy.go | 6 +----- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- pkg/epp/util/testing/wrappers.go | 2 +- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index aaf0f51ab..b957af2b7 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -70,19 +70,18 @@ type InferencePoolSpec struct { // +kubebuilder:validation:MaxItems=1 // +listType=map // +listMapKey=number + // +required TargetPorts []Port `json:"targetPorts"` // Extension configures an endpoint picker as an extension service. // +optional - ExtensionRef *Extension `json:"extensionRef,omitempty,omitzero"` + ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } type Port struct { // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // - // +kubebuilder:validation:Minimum=1 - // +kubebuilder:validation:Maximum=65535 // +kubebuilder:validation:Required Number PortNumber `json:"number"` } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index b3af93be4..0cdf1700a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -113,11 +113,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 332c95a1c..08435c964 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -42,7 +42,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} - dst.Spec.ExtensionRef = v1Extension + dst.Spec.ExtensionRef = *v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -58,7 +58,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { if src == nil { return errors.New("src cannot be nil") } - extensionRef, err := convertExtensionRefFromV1(src.Spec.ExtensionRef) + extensionRef, err := convertExtensionRefFromV1(&src.Spec.ExtensionRef) if err != nil { return err } diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 2856610c7..566d71dee 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -100,7 +100,7 @@ func TestInferencePoolConvertTo(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: &v1.Extension{ + ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -235,7 +235,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: &v1.Extension{ + ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a12fed536..a47683d47 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -206,7 +206,7 @@ func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { } func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper { - m.Spec.ExtensionRef = &v1.Extension{Name: v1.ObjectName(name)} + m.Spec.ExtensionRef = v1.Extension{Name: v1.ObjectName(name)} return m } From df8681c3d11f200b8c41b55459ea50e36dfcd208 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 14:07:47 -0700 Subject: [PATCH 26/38] make it back to pointer --- api/v1/inferencepool_types.go | 3 +-- api/v1/zz_generated.deepcopy.go | 6 +++++- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- pkg/epp/util/testing/wrappers.go | 2 +- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index b957af2b7..c7f25c6ed 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -74,8 +74,7 @@ type InferencePoolSpec struct { TargetPorts []Port `json:"targetPorts"` // Extension configures an endpoint picker as an extension service. - // +optional - ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` + ExtensionRef *Extension `json:"extensionRef,omitempty"` } type Port struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0cdf1700a..b3af93be4 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -113,7 +113,11 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } - in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 08435c964..332c95a1c 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -42,7 +42,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} - dst.Spec.ExtensionRef = *v1Extension + dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -58,7 +58,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { if src == nil { return errors.New("src cannot be nil") } - extensionRef, err := convertExtensionRefFromV1(&src.Spec.ExtensionRef) + extensionRef, err := convertExtensionRefFromV1(src.Spec.ExtensionRef) if err != nil { return err } diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 566d71dee..2856610c7 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -100,7 +100,7 @@ func TestInferencePoolConvertTo(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: v1.Extension{ + ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -235,7 +235,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: v1.Extension{ + ExtensionRef: &v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a47683d47..a12fed536 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -206,7 +206,7 @@ func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { } func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper { - m.Spec.ExtensionRef = v1.Extension{Name: v1.ObjectName(name)} + m.Spec.ExtensionRef = &v1.Extension{Name: v1.ObjectName(name)} return m } From 4678971f1467ad27bff9294b550293afbfd68a42 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 14:27:52 -0700 Subject: [PATCH 27/38] fixed kal linter --- api/v1/inferencepool_types.go | 9 +++++---- api/v1/zz_generated.deepcopy.go | 6 +----- apix/v1alpha2/inferencepool_conversion.go | 8 ++++---- apix/v1alpha2/inferencepool_conversion_test.go | 8 ++++---- apix/v1alpha2/inferencepool_types.go | 3 ++- apix/v1alpha2/zz_generated.deepcopy.go | 6 +----- pkg/epp/util/testing/wrappers.go | 4 ++-- 7 files changed, 19 insertions(+), 25 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index c7f25c6ed..3af5006e8 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -71,18 +71,19 @@ type InferencePoolSpec struct { // +listType=map // +listMapKey=number // +required - TargetPorts []Port `json:"targetPorts"` + TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - ExtensionRef *Extension `json:"extensionRef,omitempty"` + // +optional + ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } type Port struct { // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. // - // +kubebuilder:validation:Required - Number PortNumber `json:"number"` + // +required + Number PortNumber `json:"number,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index b3af93be4..0cdf1700a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -113,11 +113,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = make([]Port, len(*in)) copy(*out, *in) } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 332c95a1c..312f77f6a 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) if err != nil { return err } @@ -42,7 +42,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} - dst.Spec.ExtensionRef = v1Extension + dst.Spec.ExtensionRef = *v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -58,7 +58,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { if src == nil { return errors.New("src cannot be nil") } - extensionRef, err := convertExtensionRefFromV1(src.Spec.ExtensionRef) + extensionRef, err := convertExtensionRefFromV1(&src.Spec.ExtensionRef) if err != nil { return err } @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = extensionRef + dst.Spec.ExtensionRef = *extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 2856610c7..04305ecca 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -100,7 +100,7 @@ func TestInferencePoolConvertTo(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: &v1.Extension{ + ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -235,7 +235,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { }, }, TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, - ExtensionRef: &v1.Extension{ + ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, Name: "my-epp-service", @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 515aa0080..352724d53 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -70,7 +70,8 @@ type InferencePoolSpec struct { TargetPortNumber int32 `json:"targetPortNumber"` // Extension configures an endpoint picker as an extension service. - ExtensionRef *Extension `json:"extensionRef,omitempty"` + // +optional + ExtensionRef Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index f8eb08185..5a71e7530 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,11 +231,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a12fed536..8a655fc7e 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -206,7 +206,7 @@ func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { } func (m *InferencePoolWrapper) ExtensionRef(name string) *InferencePoolWrapper { - m.Spec.ExtensionRef = &v1.Extension{Name: v1.ObjectName(name)} + m.Spec.ExtensionRef = v1.Extension{Name: v1.ObjectName(name)} return m } @@ -252,7 +252,7 @@ func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper } func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { - m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} + m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } From f93f723e7844b7aa7454f90ac26fde27c54bfa7c Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 14:31:12 -0700 Subject: [PATCH 28/38] fixed conversion --- apix/v1alpha2/inferencepool_conversion.go | 8 ++++---- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 312f77f6a..3a072f4d3 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -104,7 +104,7 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -119,7 +119,7 @@ func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 04305ecca..847372b53 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -127,7 +127,7 @@ func TestInferencePoolConvertTo(t *testing.T) { wantErr: false, }, { - name: "conversion from v1alpha2 to v1 with nil extensionRef", + name: "conversion from v1alpha2 to v1 with empty extensionRef", src: &InferencePool{ TypeMeta: metav1.TypeMeta{ Kind: "InferencePool", @@ -300,7 +300,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { wantErr: false, }, { - name: "conversion from v1 to v1alpha2 with nil extensionRef", + name: "conversion from v1 to v1alpha2 with empty extensionRef", src: &v1.InferencePool{ TypeMeta: metav1.TypeMeta{ Kind: "InferencePool", From 369ce0a8f8c2f4a43f310ca4aecd49fef3edfe70 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 14:38:25 -0700 Subject: [PATCH 29/38] revert v1a2 to use pointer --- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- apix/v1alpha2/inferencepool_types.go | 2 +- apix/v1alpha2/zz_generated.deepcopy.go | 6 +++++- pkg/epp/util/testing/wrappers.go | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 3a072f4d3..eca18b680 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) if err != nil { return err } @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = *extensionRef + dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 847372b53..d43589dd7 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: Extension{ + ExtensionRef: &Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: Extension{ + ExtensionRef: &Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 352724d53..53c3037c3 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -71,7 +71,7 @@ type InferencePoolSpec struct { // Extension configures an endpoint picker as an extension service. // +optional - ExtensionRef Extension `json:"extensionRef,omitempty"` + ExtensionRef *Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index 5a71e7530..f8eb08185 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,7 +231,11 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 8a655fc7e..a47683d47 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -252,7 +252,7 @@ func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper } func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { - m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} + m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } From 4207d68465a6ef3bcedac3d7433b0483391915af Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 14:43:31 -0700 Subject: [PATCH 30/38] change extension to be non-nil pointer --- api/v1/inferencepool_types.go | 2 +- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- apix/v1alpha2/inferencepool_types.go | 4 ++-- apix/v1alpha2/zz_generated.deepcopy.go | 6 +----- .../bases/inference.networking.k8s.io_inferencepools.yaml | 1 + .../bases/inference.networking.x-k8s.io_inferencepools.yaml | 1 + pkg/epp/util/testing/wrappers.go | 2 +- 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 3af5006e8..e4b6e2ca7 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -74,7 +74,7 @@ type InferencePoolSpec struct { TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - // +optional + // +required ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index eca18b680..3a072f4d3 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) if err != nil { return err } @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = extensionRef + dst.Spec.ExtensionRef = *extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index d43589dd7..847372b53 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 53c3037c3..0fd879f78 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -70,8 +70,8 @@ type InferencePoolSpec struct { TargetPortNumber int32 `json:"targetPortNumber"` // Extension configures an endpoint picker as an extension service. - // +optional - ExtensionRef *Extension `json:"extensionRef,omitempty"` + // +required + ExtensionRef Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index f8eb08185..5a71e7530 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,11 +231,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index ed2b16f8a..c3c64d932 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -153,6 +153,7 @@ spec: - number x-kubernetes-list-type: map required: + - extensionRef - selector - targetPorts type: object diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index b40b1556e..138734e7e 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -130,6 +130,7 @@ spec: minimum: 1 type: integer required: + - extensionRef - selector - targetPortNumber type: object diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a47683d47..8a655fc7e 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -252,7 +252,7 @@ func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper } func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { - m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} + m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } From f76c994fca9deae9f142651a3de46f5f914389d1 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 15:59:40 -0700 Subject: [PATCH 31/38] make v1a2 the extension --- api/v1/inferencepool_types.go | 2 +- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- apix/v1alpha2/inferencepool_types.go | 2 +- apix/v1alpha2/zz_generated.deepcopy.go | 6 +++++- pkg/epp/util/testing/wrappers.go | 2 +- 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index e4b6e2ca7..cf2c6f47a 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -75,7 +75,7 @@ type InferencePoolSpec struct { // Extension configures an endpoint picker as an extension service. // +required - ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` + ExtensionRef Extension `json:"extensionRef,omitzero"` } type Port struct { diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 3a072f4d3..eca18b680 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) if err != nil { return err } @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = *extensionRef + dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 847372b53..d43589dd7 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: Extension{ + ExtensionRef: &Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: Extension{ + ExtensionRef: &Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 0fd879f78..f9747d542 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -71,7 +71,7 @@ type InferencePoolSpec struct { // Extension configures an endpoint picker as an extension service. // +required - ExtensionRef Extension `json:"extensionRef,omitempty"` + ExtensionRef *Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index 5a71e7530..f8eb08185 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,7 +231,11 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) + if in.ExtensionRef != nil { + in, out := &in.ExtensionRef, &out.ExtensionRef + *out = new(Extension) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index 8a655fc7e..a47683d47 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -252,7 +252,7 @@ func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper } func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { - m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} + m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } From ba5ceaaf0b257bbde0e48d9f675da86d48607c10 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 16:00:56 -0700 Subject: [PATCH 32/38] added godocs --- api/v1/inferencepool_types.go | 2 ++ .../crd/bases/inference.networking.k8s.io_inferencepools.yaml | 3 +++ 2 files changed, 5 insertions(+) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index cf2c6f47a..491dc92ca 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -66,6 +66,8 @@ type InferencePoolSpec struct { // +required Selector LabelSelector `json:"selector,omitempty,omitzero"` + // TargetPorts defines a list of ports that are exposed by this InferencePool. + // Currently, the list may only include a single port definition. // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1 // +listType=map diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index c3c64d932..0cb5e0ef6 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -133,6 +133,9 @@ spec: - matchLabels type: object targetPorts: + description: |- + TargetPorts defines a list of ports that are exposed by this InferencePool. + Currently, the list may only include a single port definition. items: properties: number: From aa8b10e3c4db023993ecacd6868e3cb2b745789f Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 16:36:49 -0700 Subject: [PATCH 33/38] updated comments --- api/v1/inferencepool_types.go | 3 ++- apix/v1alpha2/inferencepool_conversion.go | 20 +++++++++---------- ...ence.networking.k8s.io_inferencepools.yaml | 3 ++- .../inferencepool_reconciler_test.go | 4 ++-- pkg/epp/util/testing/wrappers.go | 20 +++++++++---------- 5 files changed, 26 insertions(+), 24 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index 491dc92ca..b4c2291de 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -76,10 +76,11 @@ type InferencePoolSpec struct { TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - // +required + // +optional ExtensionRef Extension `json:"extensionRef,omitzero"` } +// Port defines the network port that will be exposed by this InferencePool. type Port struct { // Number defines the port number to access the selected model server Pods. // The number must be in the range 1 to 65535. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index eca18b680..1f32da923 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -41,8 +41,8 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} - dst.Spec.ExtensionRef = *v1Extension + dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(int32(src.Spec.TargetPortNumber))}} + dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) { if src == nil { - return nil, errors.New("src cannot be nil") + return nil, nil } u, err := toUnstructured(src) if err != nil { @@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) { if src == nil { - return nil, errors.New("src cannot be nil") + return nil, nil } u, err := toUnstructured(src) if err != nil { @@ -102,24 +102,24 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err return convert[InferencePoolStatus](u) } -func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { +func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { if src == nil { - return nil, errors.New("src cannot be nil") + return v1.Extension{}, nil } u, err := toUnstructured(src) if err != nil { - return nil, err + return v1.Extension{}, err } out, err := convert[v1.Extension](u) if err != nil { - return nil, err + return v1.Extension{}, err } - return out, nil + return *out, nil } func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { if src == nil { - return nil, errors.New("src cannot be nil") + return nil, nil } u, err := toUnstructured(src) if err != nil { diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 0cb5e0ef6..2dfd132ba 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -137,6 +137,8 @@ spec: TargetPorts defines a list of ports that are exposed by this InferencePool. Currently, the list may only include a single port definition. items: + description: Port defines the network port that will be exposed + by this InferencePool. properties: number: description: |- @@ -156,7 +158,6 @@ spec: - number x-kubernetes-list-type: map required: - - extensionRef - selector - targetPorts type: object diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 363ec9014..48d508389 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -219,12 +219,12 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } - pool1 := utiltest.MakeXInferencePool("pool1"). + pool1 := utiltest.MakeAlphaInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() - pool2 := utiltest.MakeXInferencePool("pool2"). + pool2 := utiltest.MakeAlphaInferencePool("pool2"). Namespace("pool2-ns"). ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index a47683d47..fbffd129b 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -215,14 +215,14 @@ func (m *InferencePoolWrapper) ObjRef() *v1.InferencePool { return &m.InferencePool } -// XInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool. -type XInferencePoolWrapper struct { +// AlphaInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool. +type AlphaInferencePoolWrapper struct { v1alpha2.InferencePool } -// MakeXInferencePool creates a wrapper for a InferencePool. -func MakeXInferencePool(name string) *XInferencePoolWrapper { - return &XInferencePoolWrapper{ +// MakeAlphaInferencePool creates a wrapper for a InferencePool. +func MakeAlphaInferencePool(name string) *AlphaInferencePoolWrapper { + return &AlphaInferencePoolWrapper{ v1alpha2.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -232,12 +232,12 @@ func MakeXInferencePool(name string) *XInferencePoolWrapper { } } -func (m *XInferencePoolWrapper) Namespace(ns string) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) Namespace(ns string) *AlphaInferencePoolWrapper { m.ObjectMeta.Namespace = ns return m } -func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) Selector(selector map[string]string) *AlphaInferencePoolWrapper { s := make(map[v1alpha2.LabelKey]v1alpha2.LabelValue) for k, v := range selector { s[v1alpha2.LabelKey(k)] = v1alpha2.LabelValue(v) @@ -246,17 +246,17 @@ func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInference return m } -func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) TargetPortNumber(p int32) *AlphaInferencePoolWrapper { m.Spec.TargetPortNumber = p return m } -func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) ExtensionRef(name string) *AlphaInferencePoolWrapper { m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } // Obj returns the wrapped InferencePool. -func (m *XInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool { +func (m *AlphaInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool { return &m.InferencePool } From 49d2c4339145af50005ce888d2cabb709cf3576a Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 16:42:53 -0700 Subject: [PATCH 34/38] updated comments --- api/v1/inferencepool_types.go | 4 ++-- apix/v1alpha2/inferencepool_conversion.go | 16 ++++++++-------- apix/v1alpha2/inferencepool_conversion_test.go | 4 ++-- apix/v1alpha2/inferencepool_types.go | 2 +- apix/v1alpha2/zz_generated.deepcopy.go | 6 +----- ...ference.networking.k8s.io_inferencepools.yaml | 1 + pkg/epp/util/testing/wrappers.go | 2 +- 7 files changed, 16 insertions(+), 19 deletions(-) diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index b4c2291de..103d536a9 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -76,8 +76,8 @@ type InferencePoolSpec struct { TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - // +optional - ExtensionRef Extension `json:"extensionRef,omitzero"` + // +required + ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } // Port defines the network port that will be exposed by this InferencePool. diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 1f32da923..3e5b56f04 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) if err != nil { return err } @@ -42,7 +42,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(int32(src.Spec.TargetPortNumber))}} - dst.Spec.ExtensionRef = v1Extension + dst.Spec.ExtensionRef = *v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = extensionRef + dst.Spec.ExtensionRef = *extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) @@ -102,19 +102,19 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err return convert[InferencePoolStatus](u) } -func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { +func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { if src == nil { - return v1.Extension{}, nil + return nil, nil } u, err := toUnstructured(src) if err != nil { - return v1.Extension{}, err + return nil, err } out, err := convert[v1.Extension](u) if err != nil { - return v1.Extension{}, err + return nil, err } - return *out, nil + return out, nil } func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index d43589dd7..847372b53 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index f9747d542..0fd879f78 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -71,7 +71,7 @@ type InferencePoolSpec struct { // Extension configures an endpoint picker as an extension service. // +required - ExtensionRef *Extension `json:"extensionRef,omitempty"` + ExtensionRef Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index f8eb08185..5a71e7530 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,11 +231,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 2dfd132ba..f4bc83dca 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -158,6 +158,7 @@ spec: - number x-kubernetes-list-type: map required: + - extensionRef - selector - targetPorts type: object diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index fbffd129b..9eeb3ad9d 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -252,7 +252,7 @@ func (m *AlphaInferencePoolWrapper) TargetPortNumber(p int32) *AlphaInferencePoo } func (m *AlphaInferencePoolWrapper) ExtensionRef(name string) *AlphaInferencePoolWrapper { - m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} + m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } From 0896625323eb7c94eda2a5ba9a137f28b8eadac5 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 17:00:58 -0700 Subject: [PATCH 35/38] updated conversion --- apix/v1alpha2/inferencepool_conversion.go | 28 +++++++++-------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 3e5b56f04..1bb2c19eb 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -42,7 +42,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(int32(src.Spec.TargetPortNumber))}} - dst.Spec.ExtensionRef = *v1Extension + dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { dst.Spec.Selector.MatchLabels = make(map[v1.LabelKey]v1.LabelValue, len(src.Spec.Selector)) @@ -69,7 +69,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) - dst.Spec.ExtensionRef = *extensionRef + dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { dst.Spec.Selector = make(map[LabelKey]LabelValue, len(src.Spec.Selector.MatchLabels)) @@ -102,34 +102,28 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err return convert[InferencePoolStatus](u) } -func convertExtensionRefToV1(src *Extension) (*v1.Extension, error) { - if src == nil { - return nil, nil - } +func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { u, err := toUnstructured(src) if err != nil { - return nil, err + return v1.Extension{}, err } out, err := convert[v1.Extension](u) if err != nil { - return nil, err + return v1.Extension{}, err } - return out, nil + return *out, nil } -func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { - if src == nil { - return nil, nil - } - u, err := toUnstructured(src) +func convertExtensionRefFromV1(src *v1.Extension) (Extension, error) { + u, err := toUnstructured(&src) if err != nil { - return nil, err + return Extension{}, err } extension, err := convert[Extension](u) if err != nil { - return nil, err + return Extension{}, err } - return extension, nil + return *extension, nil } func toUnstructured(obj any) (*unstructured.Unstructured, error) { From d8a7dff37f51fee41f1e57dd592474f81237a1e4 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 17:05:38 -0700 Subject: [PATCH 36/38] added nil check --- apix/v1alpha2/inferencepool_conversion.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 1bb2c19eb..eb5939b27 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -103,6 +103,9 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err } func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { + if src == nil { + return v1.Extension{}, errors.New("src cannot be nil") + } u, err := toUnstructured(src) if err != nil { return v1.Extension{}, err @@ -115,6 +118,9 @@ func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { } func convertExtensionRefFromV1(src *v1.Extension) (Extension, error) { + if src == nil { + return Extension{}, errors.New("src cannot be nil") + } u, err := toUnstructured(&src) if err != nil { return Extension{}, err From 36401285786228992bd5a344d0e4bce2831d285c Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 17:06:32 -0700 Subject: [PATCH 37/38] added nil check --- apix/v1alpha2/inferencepool_conversion.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index eb5939b27..58e696e4d 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { From c352baedcfb7a626ab7754b8eb3cd28fb92696c6 Mon Sep 17 00:00:00 2001 From: Xiyue Yu Date: Thu, 14 Aug 2025 18:32:29 -0700 Subject: [PATCH 38/38] fixed linter --- apix/v1alpha2/inferencepool_conversion.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 58e696e4d..aa29d8f69 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(int32(src.Spec.TargetPortNumber))}} + dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil {