diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go index b1ce91e56..103d536a9 100644 --- a/api/v1/inferencepool_types.go +++ b/api/v1/inferencepool_types.go @@ -66,19 +66,29 @@ type InferencePoolSpec struct { // +required Selector LabelSelector `json:"selector,omitempty,omitzero"` - // TargetPortNumber defines the port number to access the selected model server Pods. - // The number must be in the range 1 to 65535. - // - // +kubebuilder:validation:Minimum=1 - // +kubebuilder:validation:Maximum=65535 + // TargetPorts defines a list of ports that are exposed by this InferencePool. + // Currently, the list may only include a single port definition. + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=1 + // +listType=map + // +listMapKey=number // +required - TargetPortNumber int32 `json:"targetPortNumber,omitempty"` + TargetPorts []Port `json:"targetPorts,omitempty"` // Extension configures an endpoint picker as an extension service. - // +optional + // +required ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"` } +// Port defines the network port that will be exposed by this InferencePool. +type Port struct { + // Number defines the port number to access the selected model server Pods. + // The number must be in the range 1 to 65535. + // + // +required + Number PortNumber `json:"number,omitempty"` +} + // Extension specifies how to configure an extension that runs the endpoint picker. type Extension struct { // Group is the group of the referent. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 11fa77fdb..0cdf1700a 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -108,6 +108,11 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object { func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { *out = *in in.Selector.DeepCopyInto(&out.Selector) + if in.TargetPorts != nil { + in, out := &in.TargetPorts, &out.TargetPorts + *out = make([]Port, len(*in)) + copy(*out, *in) + } in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } @@ -207,3 +212,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Port) DeepCopyInto(out *Port) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port. +func (in *Port) DeepCopy() *Port { + if in == nil { + return nil + } + out := new(Port) + in.DeepCopyInto(out) + return out +} diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go index 8c02093cc..aa29d8f69 100644 --- a/apix/v1alpha2/inferencepool_conversion.go +++ b/apix/v1alpha2/inferencepool_conversion.go @@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { if dst == nil { return errors.New("dst cannot be nil") } - v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef) + v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef) if err != nil { return err } @@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber + dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}} dst.Spec.ExtensionRef = v1Extension dst.Status = *v1Status if src.Spec.Selector != nil { @@ -68,7 +68,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { } dst.TypeMeta = src.TypeMeta dst.ObjectMeta = src.ObjectMeta - dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber + dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number) dst.Spec.ExtensionRef = extensionRef dst.Status = *status if src.Spec.Selector.MatchLabels != nil { @@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error { func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) { if src == nil { - return nil, nil + return nil, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -104,7 +104,7 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { if src == nil { - return v1.Extension{}, nil + return v1.Extension{}, errors.New("src cannot be nil") } u, err := toUnstructured(src) if err != nil { @@ -117,19 +117,19 @@ func convertExtensionRefToV1(src *Extension) (v1.Extension, error) { return *out, nil } -func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) { +func convertExtensionRefFromV1(src *v1.Extension) (Extension, error) { if src == nil { - return nil, nil + return Extension{}, errors.New("src cannot be nil") } - u, err := toUnstructured(src) + u, err := toUnstructured(&src) if err != nil { - return nil, err + return Extension{}, err } extension, err := convert[Extension](u) if err != nil { - return nil, err + return Extension{}, err } - return extension, nil + return *extension, nil } func toUnstructured(obj any) (*unstructured.Unstructured, error) { diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go index 5c79861c4..847372b53 100644 --- a/apix/v1alpha2/inferencepool_conversion_test.go +++ b/apix/v1alpha2/inferencepool_conversion_test.go @@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -99,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, @@ -127,7 +127,7 @@ func TestInferencePoolConvertTo(t *testing.T) { wantErr: false, }, { - name: "conversion from v1alpha2 to v1 with nil extensionRef", + name: "conversion from v1alpha2 to v1 with empty extensionRef", src: &InferencePool{ TypeMeta: metav1.TypeMeta{ Kind: "InferencePool", @@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -234,7 +234,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, ExtensionRef: v1.Extension{ Group: &v1Group, Kind: v1Kind, @@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{ + ExtensionRef: Extension{ Group: &group, Kind: &kind, Name: "my-epp-service", @@ -300,7 +300,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { wantErr: false, }, { - name: "conversion from v1 to v1alpha2 with nil extensionRef", + name: "conversion from v1 to v1alpha2 with empty extensionRef", src: &v1.InferencePool{ TypeMeta: metav1.TypeMeta{ Kind: "InferencePool", @@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, }, - TargetPortNumber: 8080, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}}, }, Status: v1.InferencePoolStatus{ Parents: []v1.PoolStatus{ @@ -348,7 +348,6 @@ func TestInferencePoolConvertFrom(t *testing.T) { "app": "my-model-server", }, TargetPortNumber: 8080, - ExtensionRef: &Extension{}, }, Status: InferencePoolStatus{ Parents: []PoolStatus{ diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go index 515aa0080..0fd879f78 100644 --- a/apix/v1alpha2/inferencepool_types.go +++ b/apix/v1alpha2/inferencepool_types.go @@ -70,7 +70,8 @@ type InferencePoolSpec struct { TargetPortNumber int32 `json:"targetPortNumber"` // Extension configures an endpoint picker as an extension service. - ExtensionRef *Extension `json:"extensionRef,omitempty"` + // +required + ExtensionRef Extension `json:"extensionRef,omitempty"` } // Extension specifies how to configure an extension that runs the endpoint picker. diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go index f8eb08185..5a71e7530 100644 --- a/apix/v1alpha2/zz_generated.deepcopy.go +++ b/apix/v1alpha2/zz_generated.deepcopy.go @@ -231,11 +231,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) { (*out)[key] = val } } - if in.ExtensionRef != nil { - in, out := &in.ExtensionRef, &out.ExtensionRef - *out = new(Extension) - (*in).DeepCopyInto(*out) - } + in.ExtensionRef.DeepCopyInto(&out.ExtensionRef) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec. diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go index 8d14a3969..364d059dd 100644 --- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go +++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go @@ -21,9 +21,9 @@ package v1 // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use // with apply. type InferencePoolSpecApplyConfiguration struct { - Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` - TargetPortNumber *int32 `json:"targetPortNumber,omitempty"` - ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` + Selector *LabelSelectorApplyConfiguration `json:"selector,omitempty"` + TargetPorts []PortApplyConfiguration `json:"targetPorts,omitempty"` + ExtensionRef *ExtensionApplyConfiguration `json:"extensionRef,omitempty"` } // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with @@ -40,11 +40,16 @@ func (b *InferencePoolSpecApplyConfiguration) WithSelector(value *LabelSelectorA return b } -// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value -// and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the TargetPortNumber field is set to the value of the last call. -func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration { - b.TargetPortNumber = &value +// WithTargetPorts adds the given value to the TargetPorts field in the declarative configuration +// and returns the receiver, so that objects can be build by chaining "With" function invocations. +// If called multiple times, values provided by each call will be appended to the TargetPorts field. +func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApplyConfiguration) *InferencePoolSpecApplyConfiguration { + for i := range values { + if values[i] == nil { + panic("nil value passed to WithTargetPorts") + } + b.TargetPorts = append(b.TargetPorts, *values[i]) + } return b } diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go new file mode 100644 index 000000000..6067a5d38 --- /dev/null +++ b/client-go/applyconfiguration/api/v1/port.go @@ -0,0 +1,43 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1" +) + +// PortApplyConfiguration represents a declarative configuration of the Port type for use +// with apply. +type PortApplyConfiguration struct { + Number *apiv1.PortNumber `json:"number,omitempty"` +} + +// PortApplyConfiguration constructs a declarative configuration of the Port type for use with +// apply. +func Port() *PortApplyConfiguration { + return &PortApplyConfiguration{} +} + +// WithNumber sets the Number field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Number field is set to the value of the last call. +func (b *PortApplyConfiguration) WithNumber(value apiv1.PortNumber) *PortApplyConfiguration { + b.Number = &value + return b +} diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go index f208bf1d9..c5d4f575e 100644 --- a/client-go/applyconfiguration/utils.go +++ b/client-go/applyconfiguration/utils.go @@ -48,6 +48,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &apiv1.ParentGatewayReferenceApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("PoolStatus"): return &apiv1.PoolStatusApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("Port"): + return &apiv1.PortApplyConfiguration{} // Group=inference.networking.x-k8s.io, Version=v1alpha2 case v1alpha2.SchemeGroupVersion.WithKind("Extension"): diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index c614b2146..6b14d53be 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -148,7 +148,7 @@ var ( "The configuration specified as text, in lieu of a file") modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+ - "Default value will be set to InferencePool.Spec.TargetPortNumber if not set.") + "Default value will be set to the InferencePool.Spec.TargetPorts[0].Number if not set.") modelServerMetricsPath = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods") modelServerMetricsScheme = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods") modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)") diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index ecddbfaa7..ad13f2d43 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -7,7 +7,10 @@ metadata: labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: - targetPortNumber: {{ .Values.inferencePool.targetPortNumber }} + targetPorts: + {{- range .Values.inferencePool.targetPorts }} + - number: {{ .number }} + {{- end }} selector: matchLabels: {{- if .Values.inferencePool.modelServers.matchLabels }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 95eb07731..5f0c2fd04 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -39,11 +39,12 @@ inferenceExtension: enableLeaderElection: false inferencePool: - targetPortNumber: 8000 + targetPorts: + - number: 8000 modelServerType: vllm # vllm, triton-tensorrt-llm - # modelServers: # REQUIRED - # matchLabels: - # app: vllm-llama3-8b-instruct + modelServers: # REQUIRED + matchLabels: + app: vllm-llama3-8b-instruct provider: name: none diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml index 99fd2a97a..f4bc83dca 100644 --- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml @@ -132,17 +132,35 @@ spec: required: - matchLabels type: object - targetPortNumber: + targetPorts: description: |- - TargetPortNumber defines the port number to access the selected model server Pods. - The number must be in the range 1 to 65535. - format: int32 - maximum: 65535 - minimum: 1 - type: integer + TargetPorts defines a list of ports that are exposed by this InferencePool. + Currently, the list may only include a single port definition. + items: + description: Port defines the network port that will be exposed + by this InferencePool. + properties: + number: + description: |- + Number defines the port number to access the selected model server Pods. + The number must be in the range 1 to 65535. + format: int32 + maximum: 65535 + minimum: 1 + type: integer + required: + - number + type: object + maxItems: 1 + minItems: 1 + type: array + x-kubernetes-list-map-keys: + - number + x-kubernetes-list-type: map required: + - extensionRef - selector - - targetPortNumber + - targetPorts type: object status: default: diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml index b40b1556e..138734e7e 100644 --- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml +++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml @@ -130,6 +130,7 @@ spec: minimum: 1 type: integer required: + - extensionRef - selector - targetPortNumber type: object diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml index 33a7b8e74..a3eba5582 100644 --- a/config/manifests/inferencepool-resources.yaml +++ b/config/manifests/inferencepool-resources.yaml @@ -8,7 +8,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml index e3d7561bb..5e00a6971 100644 --- a/conformance/resources/base.yaml +++ b/conformance/resources/base.yaml @@ -155,7 +155,8 @@ spec: selector: matchLabels: app: primary-inference-model-server - targetPortNumber: 3000 + targetPorts: + - number: 3000 extensionRef: name: primary-endpoint-picker-svc --- @@ -248,8 +249,9 @@ metadata: spec: selector: matchLabels: - app: secondary-inference-model-server - targetPortNumber: 3000 + app: primary-inference-model-server + targetPorts: + - number: 3000 extensionRef: name: secondary-endpoint-picker-svc failureMode: FailOpen diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go index eaa77d5c9..da66a97ed 100644 --- a/pkg/epp/backend/metrics/pod_metrics.go +++ b/pkg/epp/backend/metrics/pod_metrics.go @@ -117,7 +117,10 @@ func (pm *podMetrics) refreshMetrics() error { } ctx, cancel := context.WithTimeout(context.Background(), fetchMetricsTimeout) defer cancel() - updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), int32(pool.Spec.TargetPorts[0].Number)) if err != nil { pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err) } diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go index d3d8e2b45..9a0e1a6fc 100644 --- a/pkg/epp/backend/metrics/pod_metrics_test.go +++ b/pkg/epp/backend/metrics/pod_metrics_test.go @@ -88,7 +88,7 @@ func TestMetricsRefresh(t *testing.T) { type fakeDataStore struct{} func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) { - return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPortNumber: 8000}}, nil + return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil } func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics { diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go index 8a6fcc4e2..48d508389 100644 --- a/pkg/epp/controller/inferencepool_reconciler_test.go +++ b/pkg/epp/controller/inferencepool_reconciler_test.go @@ -80,8 +80,8 @@ func TestInferencePoolReconciler(t *testing.T) { pool1 := utiltest.MakeInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). - ExtensionRef("epp-service"). - TargetPortNumber(8080).ObjRef() + TargetPorts(8080). + ExtensionRef("epp-service").ObjRef() pool1.SetGroupVersionKind(gvk) pool2 := utiltest.MakeInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() pool2.SetGroupVersionKind(gvk) @@ -146,7 +146,7 @@ func TestInferencePoolReconciler(t *testing.T) { if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil { t.Errorf("Unexpected pool get error: %v", err) } - newPool1.Spec.TargetPortNumber = 9090 + newPool1.Spec.TargetPorts = []v1.Port{{Number: 9090}} if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil { t.Errorf("Unexpected pool update error: %v", err) } @@ -219,12 +219,15 @@ func TestXInferencePoolReconciler(t *testing.T) { Version: v1alpha2.GroupVersion.Version, Kind: "InferencePool", } - pool1 := utiltest.MakeXInferencePool("pool1"). + pool1 := utiltest.MakeAlphaInferencePool("pool1"). Namespace("pool1-ns"). Selector(selector_v1). ExtensionRef("epp-service"). TargetPortNumber(8080).ObjRef() - pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef() + pool2 := utiltest.MakeAlphaInferencePool("pool2"). + Namespace("pool2-ns"). + ExtensionRef("epp-service"). + TargetPortNumber(8080).ObjRef() pool1.SetGroupVersionKind(gvk) pool2.SetGroupVersionKind(gvk) @@ -323,6 +326,7 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa if gotPool == nil && params.wantPool == nil { return "" } + gotXPool := &v1alpha2.InferencePool{} err := gotXPool.ConvertFrom(gotPool) diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go index d1dae0f05..5ceb3efdb 100644 --- a/pkg/epp/controller/pod_reconciler_test.go +++ b/pkg/epp/controller/pod_reconciler_test.go @@ -61,7 +61,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -79,7 +79,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -97,7 +97,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -116,7 +116,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -132,7 +132,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -149,7 +149,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", @@ -167,7 +167,7 @@ func TestPodReconciler(t *testing.T) { existingPods: []*corev1.Pod{basePod1, basePod2}, pool: &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "some-key": "some-val", diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go index 77fe7c7c0..271c31ee7 100644 --- a/pkg/epp/datastore/datastore_test.go +++ b/pkg/epp/datastore/datastore_test.go @@ -245,7 +245,7 @@ var ( pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace} inferencePool = &v1.InferencePool{ Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } ) diff --git a/pkg/epp/handlers/request.go b/pkg/epp/handlers/request.go index d051e163d..7f8122195 100644 --- a/pkg/epp/handlers/request.go +++ b/pkg/epp/handlers/request.go @@ -17,6 +17,7 @@ limitations under the License. package handlers import ( + "fmt" "strconv" "time" @@ -45,7 +46,10 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP if err != nil { return err } - reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber)) + if len(pool.Spec.TargetPorts) != 1 { + return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts)) + } + reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].Number)) reqCtx.RequestSize = 0 reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx) return nil diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go index e855c55c1..dcac3b37d 100644 --- a/pkg/epp/metrics/collectors/inference_pool_test.go +++ b/pkg/epp/metrics/collectors/inference_pool_test.go @@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) { Name: "test-pool", }, Spec: v1.InferencePoolSpec{ - TargetPortNumber: 8000, + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, }, } _ = ds.PoolSet(context.Background(), fakeClient, inferencePool) diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index c40b5864d..1435c0154 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -240,7 +240,10 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC return reqCtx, err } targetPods := []*backend.Pod{} - targetPort := int(pool.Spec.TargetPortNumber) + if len(pool.Spec.TargetPorts) != 1 { + return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"} + } + targetPort := int(pool.Spec.TargetPorts[0].Number) targetEndpoints := []string{} for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods { diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go index c25b9640d..e7968d4d9 100644 --- a/pkg/epp/requestcontrol/director_test.go +++ b/pkg/epp/requestcontrol/director_test.go @@ -101,7 +101,7 @@ func TestDirector_HandleRequest(t *testing.T) { pool := &v1.InferencePool{ ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"}, Spec: v1.InferencePoolSpec{ - TargetPortNumber: int32(8000), + TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}}, Selector: v1.LabelSelector{ MatchLabels: map[v1.LabelKey]v1.LabelValue{ "app": "inference", diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go index c637713f8..9eeb3ad9d 100644 --- a/pkg/epp/util/testing/wrappers.go +++ b/pkg/epp/util/testing/wrappers.go @@ -200,8 +200,8 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo return m } -func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper { - m.Spec.TargetPortNumber = p +func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper { + m.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(p)}} return m } @@ -215,14 +215,14 @@ func (m *InferencePoolWrapper) ObjRef() *v1.InferencePool { return &m.InferencePool } -// XInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool. -type XInferencePoolWrapper struct { +// AlphaInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool. +type AlphaInferencePoolWrapper struct { v1alpha2.InferencePool } -// MakeXInferencePool creates a wrapper for a InferencePool. -func MakeXInferencePool(name string) *XInferencePoolWrapper { - return &XInferencePoolWrapper{ +// MakeAlphaInferencePool creates a wrapper for a InferencePool. +func MakeAlphaInferencePool(name string) *AlphaInferencePoolWrapper { + return &AlphaInferencePoolWrapper{ v1alpha2.InferencePool{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -232,12 +232,12 @@ func MakeXInferencePool(name string) *XInferencePoolWrapper { } } -func (m *XInferencePoolWrapper) Namespace(ns string) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) Namespace(ns string) *AlphaInferencePoolWrapper { m.ObjectMeta.Namespace = ns return m } -func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) Selector(selector map[string]string) *AlphaInferencePoolWrapper { s := make(map[v1alpha2.LabelKey]v1alpha2.LabelValue) for k, v := range selector { s[v1alpha2.LabelKey(k)] = v1alpha2.LabelValue(v) @@ -246,17 +246,17 @@ func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInference return m } -func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper { +func (m *AlphaInferencePoolWrapper) TargetPortNumber(p int32) *AlphaInferencePoolWrapper { m.Spec.TargetPortNumber = p return m } -func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper { - m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} +func (m *AlphaInferencePoolWrapper) ExtensionRef(name string) *AlphaInferencePoolWrapper { + m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)} return m } // Obj returns the wrapped InferencePool. -func (m *XInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool { +func (m *AlphaInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool { return &m.InferencePool } diff --git a/site-src/api-types/inferencepool.md b/site-src/api-types/inferencepool.md index d2794478b..c4481b1ad 100644 --- a/site-src/api-types/inferencepool.md +++ b/site-src/api-types/inferencepool.md @@ -33,7 +33,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/implementers.md b/site-src/guides/implementers.md index 747e934a2..6fce01657 100644 --- a/site-src/guides/implementers.md +++ b/site-src/guides/implementers.md @@ -47,7 +47,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct extensionRef: diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md index 80a6e3bf6..98b3cc4cc 100644 --- a/site-src/guides/inferencepool-rollout.md +++ b/site-src/guides/inferencepool-rollout.md @@ -208,7 +208,8 @@ kind: InferencePool metadata: name: vllm-llama3-8b-instruct-new spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: app: vllm-llama3-8b-instruct-new extensionRef: diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml index aec574a39..7c546ba37 100644 --- a/test/testdata/inferencepool-e2e.yaml +++ b/test/testdata/inferencepool-e2e.yaml @@ -4,7 +4,8 @@ metadata: labels: name: vllm-llama3-8b-instruct spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml index 707a76eed..20fea8d1a 100644 --- a/test/testdata/inferencepool-with-model-hermetic.yaml +++ b/test/testdata/inferencepool-with-model-hermetic.yaml @@ -4,7 +4,8 @@ metadata: name: vllm-llama3-8b-instruct-pool namespace: default spec: - targetPortNumber: 8000 + targetPorts: + - number: 8000 selector: matchLabels: app: vllm-llama3-8b-instruct-pool diff --git a/test/utils/server.go b/test/utils/server.go index f76e147af..51eb33fa0 100644 --- a/test/utils/server.go +++ b/test/utils/server.go @@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po WithObjects(initObjs...). Build() pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef() - pool.Spec.TargetPortNumber = poolPort + pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}} _ = ds.PoolSet(context.Background(), fakeClient, pool) return ctx, cancel, ds, pmc