diff --git a/api/v1/inferencepool_types.go b/api/v1/inferencepool_types.go
index b1ce91e56..103d536a9 100644
--- a/api/v1/inferencepool_types.go
+++ b/api/v1/inferencepool_types.go
@@ -66,19 +66,29 @@ type InferencePoolSpec struct {
 	// +required
 	Selector LabelSelector `json:"selector,omitempty,omitzero"`
 
-	// TargetPortNumber defines the port number to access the selected model server Pods.
-	// The number must be in the range 1 to 65535.
-	//
-	// +kubebuilder:validation:Minimum=1
-	// +kubebuilder:validation:Maximum=65535
+	// TargetPorts defines a list of ports that are exposed by this InferencePool.
+	// Currently, the list may only include a single port definition.
+	// +kubebuilder:validation:MinItems=1
+	// +kubebuilder:validation:MaxItems=1
+	// +listType=map
+	// +listMapKey=number
 	// +required
-	TargetPortNumber int32 `json:"targetPortNumber,omitempty"`
+	TargetPorts []Port `json:"targetPorts,omitempty"`
 
 	// Extension configures an endpoint picker as an extension service.
-	// +optional
+	// +required
 	ExtensionRef Extension `json:"extensionRef,omitempty,omitzero"`
 }
 
+// Port defines the network port that will be exposed by this InferencePool.
+type Port struct {
+	// Number defines the port number to access the selected model server Pods.
+	// The number must be in the range 1 to 65535.
+	//
+	// +required
+	Number PortNumber `json:"number,omitempty"`
+}
+
 // Extension specifies how to configure an extension that runs the endpoint picker.
 type Extension struct {
 	// Group is the group of the referent.
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 11fa77fdb..0cdf1700a 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -108,6 +108,11 @@ func (in *InferencePoolList) DeepCopyObject() runtime.Object {
 func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) {
 	*out = *in
 	in.Selector.DeepCopyInto(&out.Selector)
+	if in.TargetPorts != nil {
+		in, out := &in.TargetPorts, &out.TargetPorts
+		*out = make([]Port, len(*in))
+		copy(*out, *in)
+	}
 	in.ExtensionRef.DeepCopyInto(&out.ExtensionRef)
 }
 
@@ -207,3 +212,18 @@ func (in *PoolStatus) DeepCopy() *PoolStatus {
 	in.DeepCopyInto(out)
 	return out
 }
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *Port) DeepCopyInto(out *Port) {
+	*out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Port.
+func (in *Port) DeepCopy() *Port {
+	if in == nil {
+		return nil
+	}
+	out := new(Port)
+	in.DeepCopyInto(out)
+	return out
+}
diff --git a/apix/v1alpha2/inferencepool_conversion.go b/apix/v1alpha2/inferencepool_conversion.go
index 8c02093cc..aa29d8f69 100644
--- a/apix/v1alpha2/inferencepool_conversion.go
+++ b/apix/v1alpha2/inferencepool_conversion.go
@@ -31,7 +31,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error {
 	if dst == nil {
 		return errors.New("dst cannot be nil")
 	}
-	v1Extension, err := convertExtensionRefToV1(src.Spec.ExtensionRef)
+	v1Extension, err := convertExtensionRefToV1(&src.Spec.ExtensionRef)
 	if err != nil {
 		return err
 	}
@@ -41,7 +41,7 @@ func (src *InferencePool) ConvertTo(dst *v1.InferencePool) error {
 	}
 	dst.TypeMeta = src.TypeMeta
 	dst.ObjectMeta = src.ObjectMeta
-	dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber
+	dst.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(src.Spec.TargetPortNumber)}}
 	dst.Spec.ExtensionRef = v1Extension
 	dst.Status = *v1Status
 	if src.Spec.Selector != nil {
@@ -68,7 +68,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error {
 	}
 	dst.TypeMeta = src.TypeMeta
 	dst.ObjectMeta = src.ObjectMeta
-	dst.Spec.TargetPortNumber = src.Spec.TargetPortNumber
+	dst.Spec.TargetPortNumber = int32(src.Spec.TargetPorts[0].Number)
 	dst.Spec.ExtensionRef = extensionRef
 	dst.Status = *status
 	if src.Spec.Selector.MatchLabels != nil {
@@ -82,7 +82,7 @@ func (dst *InferencePool) ConvertFrom(src *v1.InferencePool) error {
 
 func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error) {
 	if src == nil {
-		return nil, nil
+		return nil, errors.New("src cannot be nil")
 	}
 	u, err := toUnstructured(src)
 	if err != nil {
@@ -93,7 +93,7 @@ func convertStatusToV1(src *InferencePoolStatus) (*v1.InferencePoolStatus, error
 
 func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, error) {
 	if src == nil {
-		return nil, nil
+		return nil, errors.New("src cannot be nil")
 	}
 	u, err := toUnstructured(src)
 	if err != nil {
@@ -104,7 +104,7 @@ func convertStatusFromV1(src *v1.InferencePoolStatus) (*InferencePoolStatus, err
 
 func convertExtensionRefToV1(src *Extension) (v1.Extension, error) {
 	if src == nil {
-		return v1.Extension{}, nil
+		return v1.Extension{}, errors.New("src cannot be nil")
 	}
 	u, err := toUnstructured(src)
 	if err != nil {
@@ -117,19 +117,19 @@ func convertExtensionRefToV1(src *Extension) (v1.Extension, error) {
 	return *out, nil
 }
 
-func convertExtensionRefFromV1(src *v1.Extension) (*Extension, error) {
+func convertExtensionRefFromV1(src *v1.Extension) (Extension, error) {
 	if src == nil {
-		return nil, nil
+		return Extension{}, errors.New("src cannot be nil")
 	}
-	u, err := toUnstructured(src)
+	u, err := toUnstructured(&src)
 	if err != nil {
-		return nil, err
+		return Extension{}, err
 	}
 	extension, err := convert[Extension](u)
 	if err != nil {
-		return nil, err
+		return Extension{}, err
 	}
-	return extension, nil
+	return *extension, nil
 }
 
 func toUnstructured(obj any) (*unstructured.Unstructured, error) {
diff --git a/apix/v1alpha2/inferencepool_conversion_test.go b/apix/v1alpha2/inferencepool_conversion_test.go
index 5c79861c4..847372b53 100644
--- a/apix/v1alpha2/inferencepool_conversion_test.go
+++ b/apix/v1alpha2/inferencepool_conversion_test.go
@@ -60,7 +60,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
 						"app": "my-model-server",
 					},
 					TargetPortNumber: 8080,
-					ExtensionRef: &Extension{
+					ExtensionRef: Extension{
 						Group:       &group,
 						Kind:        &kind,
 						Name:        "my-epp-service",
@@ -99,7 +99,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
 							"app": "my-model-server",
 						},
 					},
-					TargetPortNumber: 8080,
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
 					ExtensionRef: v1.Extension{
 						Group:       &v1Group,
 						Kind:        v1Kind,
@@ -127,7 +127,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
 			wantErr: false,
 		},
 		{
-			name: "conversion from v1alpha2 to v1 with nil extensionRef",
+			name: "conversion from v1alpha2 to v1 with empty extensionRef",
 			src: &InferencePool{
 				TypeMeta: metav1.TypeMeta{
 					Kind:       "InferencePool",
@@ -174,7 +174,7 @@ func TestInferencePoolConvertTo(t *testing.T) {
 							"app": "my-model-server",
 						},
 					},
-					TargetPortNumber: 8080,
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
 				},
 				Status: v1.InferencePoolStatus{
 					Parents: []v1.PoolStatus{
@@ -234,7 +234,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
 							"app": "my-model-server",
 						},
 					},
-					TargetPortNumber: 8080,
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
 					ExtensionRef: v1.Extension{
 						Group:       &v1Group,
 						Kind:        v1Kind,
@@ -273,7 +273,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
 						"app": "my-model-server",
 					},
 					TargetPortNumber: 8080,
-					ExtensionRef: &Extension{
+					ExtensionRef: Extension{
 						Group:       &group,
 						Kind:        &kind,
 						Name:        "my-epp-service",
@@ -300,7 +300,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
 			wantErr: false,
 		},
 		{
-			name: "conversion from v1 to v1alpha2 with nil extensionRef",
+			name: "conversion from v1 to v1alpha2 with empty extensionRef",
 			src: &v1.InferencePool{
 				TypeMeta: metav1.TypeMeta{
 					Kind:       "InferencePool",
@@ -316,7 +316,7 @@ func TestInferencePoolConvertFrom(t *testing.T) {
 							"app": "my-model-server",
 						},
 					},
-					TargetPortNumber: 8080,
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8080))}},
 				},
 				Status: v1.InferencePoolStatus{
 					Parents: []v1.PoolStatus{
@@ -348,7 +348,6 @@ func TestInferencePoolConvertFrom(t *testing.T) {
 						"app": "my-model-server",
 					},
 					TargetPortNumber: 8080,
-					ExtensionRef:     &Extension{},
 				},
 				Status: InferencePoolStatus{
 					Parents: []PoolStatus{
diff --git a/apix/v1alpha2/inferencepool_types.go b/apix/v1alpha2/inferencepool_types.go
index 515aa0080..0fd879f78 100644
--- a/apix/v1alpha2/inferencepool_types.go
+++ b/apix/v1alpha2/inferencepool_types.go
@@ -70,7 +70,8 @@ type InferencePoolSpec struct {
 	TargetPortNumber int32 `json:"targetPortNumber"`
 
 	// Extension configures an endpoint picker as an extension service.
-	ExtensionRef *Extension `json:"extensionRef,omitempty"`
+	// +required
+	ExtensionRef Extension `json:"extensionRef,omitempty"`
 }
 
 // Extension specifies how to configure an extension that runs the endpoint picker.
diff --git a/apix/v1alpha2/zz_generated.deepcopy.go b/apix/v1alpha2/zz_generated.deepcopy.go
index f8eb08185..5a71e7530 100644
--- a/apix/v1alpha2/zz_generated.deepcopy.go
+++ b/apix/v1alpha2/zz_generated.deepcopy.go
@@ -231,11 +231,7 @@ func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec) {
 			(*out)[key] = val
 		}
 	}
-	if in.ExtensionRef != nil {
-		in, out := &in.ExtensionRef, &out.ExtensionRef
-		*out = new(Extension)
-		(*in).DeepCopyInto(*out)
-	}
+	in.ExtensionRef.DeepCopyInto(&out.ExtensionRef)
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec.
diff --git a/client-go/applyconfiguration/api/v1/inferencepoolspec.go b/client-go/applyconfiguration/api/v1/inferencepoolspec.go
index 8d14a3969..364d059dd 100644
--- a/client-go/applyconfiguration/api/v1/inferencepoolspec.go
+++ b/client-go/applyconfiguration/api/v1/inferencepoolspec.go
@@ -21,9 +21,9 @@ package v1
 // InferencePoolSpecApplyConfiguration represents a declarative configuration of the InferencePoolSpec type for use
 // with apply.
 type InferencePoolSpecApplyConfiguration struct {
-	Selector         *LabelSelectorApplyConfiguration `json:"selector,omitempty"`
-	TargetPortNumber *int32                           `json:"targetPortNumber,omitempty"`
-	ExtensionRef     *ExtensionApplyConfiguration     `json:"extensionRef,omitempty"`
+	Selector     *LabelSelectorApplyConfiguration `json:"selector,omitempty"`
+	TargetPorts  []PortApplyConfiguration         `json:"targetPorts,omitempty"`
+	ExtensionRef *ExtensionApplyConfiguration     `json:"extensionRef,omitempty"`
 }
 
 // InferencePoolSpecApplyConfiguration constructs a declarative configuration of the InferencePoolSpec type for use with
@@ -40,11 +40,16 @@ func (b *InferencePoolSpecApplyConfiguration) WithSelector(value *LabelSelectorA
 	return b
 }
 
-// WithTargetPortNumber sets the TargetPortNumber field in the declarative configuration to the given value
-// and returns the receiver, so that objects can be built by chaining "With" function invocations.
-// If called multiple times, the TargetPortNumber field is set to the value of the last call.
-func (b *InferencePoolSpecApplyConfiguration) WithTargetPortNumber(value int32) *InferencePoolSpecApplyConfiguration {
-	b.TargetPortNumber = &value
+// WithTargetPorts adds the given value to the TargetPorts field in the declarative configuration
+// and returns the receiver, so that objects can be build by chaining "With" function invocations.
+// If called multiple times, values provided by each call will be appended to the TargetPorts field.
+func (b *InferencePoolSpecApplyConfiguration) WithTargetPorts(values ...*PortApplyConfiguration) *InferencePoolSpecApplyConfiguration {
+	for i := range values {
+		if values[i] == nil {
+			panic("nil value passed to WithTargetPorts")
+		}
+		b.TargetPorts = append(b.TargetPorts, *values[i])
+	}
 	return b
 }
 
diff --git a/client-go/applyconfiguration/api/v1/port.go b/client-go/applyconfiguration/api/v1/port.go
new file mode 100644
index 000000000..6067a5d38
--- /dev/null
+++ b/client-go/applyconfiguration/api/v1/port.go
@@ -0,0 +1,43 @@
+/*
+Copyright The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Code generated by applyconfiguration-gen. DO NOT EDIT.
+
+package v1
+
+import (
+	apiv1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
+)
+
+// PortApplyConfiguration represents a declarative configuration of the Port type for use
+// with apply.
+type PortApplyConfiguration struct {
+	Number *apiv1.PortNumber `json:"number,omitempty"`
+}
+
+// PortApplyConfiguration constructs a declarative configuration of the Port type for use with
+// apply.
+func Port() *PortApplyConfiguration {
+	return &PortApplyConfiguration{}
+}
+
+// WithNumber sets the Number field in the declarative configuration to the given value
+// and returns the receiver, so that objects can be built by chaining "With" function invocations.
+// If called multiple times, the Number field is set to the value of the last call.
+func (b *PortApplyConfiguration) WithNumber(value apiv1.PortNumber) *PortApplyConfiguration {
+	b.Number = &value
+	return b
+}
diff --git a/client-go/applyconfiguration/utils.go b/client-go/applyconfiguration/utils.go
index f208bf1d9..c5d4f575e 100644
--- a/client-go/applyconfiguration/utils.go
+++ b/client-go/applyconfiguration/utils.go
@@ -48,6 +48,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} {
 		return &apiv1.ParentGatewayReferenceApplyConfiguration{}
 	case v1.SchemeGroupVersion.WithKind("PoolStatus"):
 		return &apiv1.PoolStatusApplyConfiguration{}
+	case v1.SchemeGroupVersion.WithKind("Port"):
+		return &apiv1.PortApplyConfiguration{}
 
 		// Group=inference.networking.x-k8s.io, Version=v1alpha2
 	case v1alpha2.SchemeGroupVersion.WithKind("Extension"):
diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go
index c614b2146..6b14d53be 100644
--- a/cmd/epp/runner/runner.go
+++ b/cmd/epp/runner/runner.go
@@ -148,7 +148,7 @@ var (
 		"The configuration specified as text, in lieu of a file")
 
 	modelServerMetricsPort = flag.Int("model-server-metrics-port", 0, "Port to scrape metrics from pods. "+
-		"Default value will be set to InferencePool.Spec.TargetPortNumber if not set.")
+		"Default value will be set to the InferencePool.Spec.TargetPorts[0].Number if not set.")
 	modelServerMetricsPath                    = flag.String("model-server-metrics-path", "/metrics", "Path to scrape metrics from pods")
 	modelServerMetricsScheme                  = flag.String("model-server-metrics-scheme", "http", "Scheme to scrape metrics from pods")
 	modelServerMetricsHttpsInsecureSkipVerify = flag.Bool("model-server-metrics-https-insecure-skip-verify", true, "When using 'https' scheme for 'model-server-metrics-scheme', configure 'InsecureSkipVerify' (default to true)")
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
index ecddbfaa7..ad13f2d43 100644
--- a/config/charts/inferencepool/templates/inferencepool.yaml
+++ b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -7,7 +7,10 @@ metadata:
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
 spec:
-  targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
+  targetPorts:
+    {{- range .Values.inferencePool.targetPorts }}
+      - number: {{ .number }}
+    {{- end }}
   selector:
     matchLabels:
       {{- if .Values.inferencePool.modelServers.matchLabels }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index 95eb07731..5f0c2fd04 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -39,11 +39,12 @@ inferenceExtension:
   enableLeaderElection: false
 
 inferencePool:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   modelServerType: vllm # vllm, triton-tensorrt-llm
-  # modelServers: # REQUIRED
-    # matchLabels: 
-    #   app: vllm-llama3-8b-instruct
+  modelServers: # REQUIRED
+    matchLabels:
+      app: vllm-llama3-8b-instruct
 
 provider:
   name: none
diff --git a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
index 99fd2a97a..f4bc83dca 100644
--- a/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.k8s.io_inferencepools.yaml
@@ -132,17 +132,35 @@ spec:
                 required:
                 - matchLabels
                 type: object
-              targetPortNumber:
+              targetPorts:
                 description: |-
-                  TargetPortNumber defines the port number to access the selected model server Pods.
-                  The number must be in the range 1 to 65535.
-                format: int32
-                maximum: 65535
-                minimum: 1
-                type: integer
+                  TargetPorts defines a list of ports that are exposed by this InferencePool.
+                  Currently, the list may only include a single port definition.
+                items:
+                  description: Port defines the network port that will be exposed
+                    by this InferencePool.
+                  properties:
+                    number:
+                      description: |-
+                        Number defines the port number to access the selected model server Pods.
+                        The number must be in the range 1 to 65535.
+                      format: int32
+                      maximum: 65535
+                      minimum: 1
+                      type: integer
+                  required:
+                  - number
+                  type: object
+                maxItems: 1
+                minItems: 1
+                type: array
+                x-kubernetes-list-map-keys:
+                - number
+                x-kubernetes-list-type: map
             required:
+            - extensionRef
             - selector
-            - targetPortNumber
+            - targetPorts
             type: object
           status:
             default:
diff --git a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
index b40b1556e..138734e7e 100644
--- a/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
+++ b/config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml
@@ -130,6 +130,7 @@ spec:
                 minimum: 1
                 type: integer
             required:
+            - extensionRef
             - selector
             - targetPortNumber
             type: object
diff --git a/config/manifests/inferencepool-resources.yaml b/config/manifests/inferencepool-resources.yaml
index 33a7b8e74..a3eba5582 100644
--- a/config/manifests/inferencepool-resources.yaml
+++ b/config/manifests/inferencepool-resources.yaml
@@ -8,7 +8,8 @@ kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     matchLabels:
       app: vllm-llama3-8b-instruct
diff --git a/conformance/resources/base.yaml b/conformance/resources/base.yaml
index e3d7561bb..5e00a6971 100644
--- a/conformance/resources/base.yaml
+++ b/conformance/resources/base.yaml
@@ -155,7 +155,8 @@ spec:
   selector:
     matchLabels:
       app: primary-inference-model-server
-  targetPortNumber: 3000
+  targetPorts:
+    - number: 3000
   extensionRef:
     name: primary-endpoint-picker-svc
 ---
@@ -248,8 +249,9 @@ metadata:
 spec:
   selector:
     matchLabels:
-      app: secondary-inference-model-server
-  targetPortNumber: 3000
+      app: primary-inference-model-server
+  targetPorts:
+    - number: 3000
   extensionRef:
     name: secondary-endpoint-picker-svc
     failureMode: FailOpen
diff --git a/pkg/epp/backend/metrics/pod_metrics.go b/pkg/epp/backend/metrics/pod_metrics.go
index eaa77d5c9..da66a97ed 100644
--- a/pkg/epp/backend/metrics/pod_metrics.go
+++ b/pkg/epp/backend/metrics/pod_metrics.go
@@ -117,7 +117,10 @@ func (pm *podMetrics) refreshMetrics() error {
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), fetchMetricsTimeout)
 	defer cancel()
-	updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), pool.Spec.TargetPortNumber)
+	if len(pool.Spec.TargetPorts) != 1 {
+		return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts))
+	}
+	updated, err := pm.pmc.FetchMetrics(ctx, pm.GetPod(), pm.GetMetrics(), int32(pool.Spec.TargetPorts[0].Number))
 	if err != nil {
 		pm.logger.V(logutil.TRACE).Info("Failed to refreshed metrics:", "err", err)
 	}
diff --git a/pkg/epp/backend/metrics/pod_metrics_test.go b/pkg/epp/backend/metrics/pod_metrics_test.go
index d3d8e2b45..9a0e1a6fc 100644
--- a/pkg/epp/backend/metrics/pod_metrics_test.go
+++ b/pkg/epp/backend/metrics/pod_metrics_test.go
@@ -88,7 +88,7 @@ func TestMetricsRefresh(t *testing.T) {
 type fakeDataStore struct{}
 
 func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) {
-	return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPortNumber: 8000}}, nil
+	return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPorts: []v1.Port{{Number: 8000}}}}, nil
 }
 
 func (f *fakeDataStore) PodList(func(PodMetrics) bool) []PodMetrics {
diff --git a/pkg/epp/controller/inferencepool_reconciler_test.go b/pkg/epp/controller/inferencepool_reconciler_test.go
index 8a6fcc4e2..48d508389 100644
--- a/pkg/epp/controller/inferencepool_reconciler_test.go
+++ b/pkg/epp/controller/inferencepool_reconciler_test.go
@@ -80,8 +80,8 @@ func TestInferencePoolReconciler(t *testing.T) {
 	pool1 := utiltest.MakeInferencePool("pool1").
 		Namespace("pool1-ns").
 		Selector(selector_v1).
-		ExtensionRef("epp-service").
-		TargetPortNumber(8080).ObjRef()
+		TargetPorts(8080).
+		ExtensionRef("epp-service").ObjRef()
 	pool1.SetGroupVersionKind(gvk)
 	pool2 := utiltest.MakeInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef()
 	pool2.SetGroupVersionKind(gvk)
@@ -146,7 +146,7 @@ func TestInferencePoolReconciler(t *testing.T) {
 	if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil {
 		t.Errorf("Unexpected pool get error: %v", err)
 	}
-	newPool1.Spec.TargetPortNumber = 9090
+	newPool1.Spec.TargetPorts = []v1.Port{{Number: 9090}}
 	if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil {
 		t.Errorf("Unexpected pool update error: %v", err)
 	}
@@ -219,12 +219,15 @@ func TestXInferencePoolReconciler(t *testing.T) {
 		Version: v1alpha2.GroupVersion.Version,
 		Kind:    "InferencePool",
 	}
-	pool1 := utiltest.MakeXInferencePool("pool1").
+	pool1 := utiltest.MakeAlphaInferencePool("pool1").
 		Namespace("pool1-ns").
 		Selector(selector_v1).
 		ExtensionRef("epp-service").
 		TargetPortNumber(8080).ObjRef()
-	pool2 := utiltest.MakeXInferencePool("pool2").Namespace("pool2-ns").ExtensionRef("epp-service").ObjRef()
+	pool2 := utiltest.MakeAlphaInferencePool("pool2").
+		Namespace("pool2-ns").
+		ExtensionRef("epp-service").
+		TargetPortNumber(8080).ObjRef()
 	pool1.SetGroupVersionKind(gvk)
 	pool2.SetGroupVersionKind(gvk)
 
@@ -323,6 +326,7 @@ func xDiffStore(t *testing.T, datastore datastore.Datastore, params xDiffStorePa
 	if gotPool == nil && params.wantPool == nil {
 		return ""
 	}
+
 	gotXPool := &v1alpha2.InferencePool{}
 
 	err := gotXPool.ConvertFrom(gotPool)
diff --git a/pkg/epp/controller/pod_reconciler_test.go b/pkg/epp/controller/pod_reconciler_test.go
index d1dae0f05..5ceb3efdb 100644
--- a/pkg/epp/controller/pod_reconciler_test.go
+++ b/pkg/epp/controller/pod_reconciler_test.go
@@ -61,7 +61,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -79,7 +79,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -97,7 +97,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -116,7 +116,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -132,7 +132,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -149,7 +149,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
@@ -167,7 +167,7 @@ func TestPodReconciler(t *testing.T) {
 			existingPods: []*corev1.Pod{basePod1, basePod2},
 			pool: &v1.InferencePool{
 				Spec: v1.InferencePoolSpec{
-					TargetPortNumber: int32(8000),
+					TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 					Selector: v1.LabelSelector{
 						MatchLabels: map[v1.LabelKey]v1.LabelValue{
 							"some-key": "some-val",
diff --git a/pkg/epp/datastore/datastore_test.go b/pkg/epp/datastore/datastore_test.go
index 77fe7c7c0..271c31ee7 100644
--- a/pkg/epp/datastore/datastore_test.go
+++ b/pkg/epp/datastore/datastore_test.go
@@ -245,7 +245,7 @@ var (
 	pod2NamespacedName = types.NamespacedName{Name: pod2.Name, Namespace: pod2.Namespace}
 	inferencePool      = &v1.InferencePool{
 		Spec: v1.InferencePoolSpec{
-			TargetPortNumber: 8000,
+			TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 		},
 	}
 )
diff --git a/pkg/epp/handlers/request.go b/pkg/epp/handlers/request.go
index d051e163d..7f8122195 100644
--- a/pkg/epp/handlers/request.go
+++ b/pkg/epp/handlers/request.go
@@ -17,6 +17,7 @@ limitations under the License.
 package handlers
 
 import (
+	"fmt"
 	"strconv"
 	"time"
 
@@ -45,7 +46,10 @@ func (s *StreamingServer) HandleRequestHeaders(reqCtx *RequestContext, req *extP
 		if err != nil {
 			return err
 		}
-		reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber))
+		if len(pool.Spec.TargetPorts) != 1 {
+			return fmt.Errorf("expected 1 target port, got %d", len(pool.Spec.TargetPorts))
+		}
+		reqCtx.TargetEndpoint = pod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPorts[0].Number))
 		reqCtx.RequestSize = 0
 		reqCtx.reqHeaderResp = s.generateRequestHeaderResponse(reqCtx)
 		return nil
diff --git a/pkg/epp/metrics/collectors/inference_pool_test.go b/pkg/epp/metrics/collectors/inference_pool_test.go
index e855c55c1..dcac3b37d 100644
--- a/pkg/epp/metrics/collectors/inference_pool_test.go
+++ b/pkg/epp/metrics/collectors/inference_pool_test.go
@@ -80,7 +80,7 @@ func TestMetricsCollected(t *testing.T) {
 			Name: "test-pool",
 		},
 		Spec: v1.InferencePoolSpec{
-			TargetPortNumber: 8000,
+			TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 		},
 	}
 	_ = ds.PoolSet(context.Background(), fakeClient, inferencePool)
diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go
index c40b5864d..1435c0154 100644
--- a/pkg/epp/requestcontrol/director.go
+++ b/pkg/epp/requestcontrol/director.go
@@ -240,7 +240,10 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
 		return reqCtx, err
 	}
 	targetPods := []*backend.Pod{}
-	targetPort := int(pool.Spec.TargetPortNumber)
+	if len(pool.Spec.TargetPorts) != 1 {
+		return reqCtx, errutil.Error{Code: errutil.BadRequest, Msg: "targetPorts should have length 1"}
+	}
+	targetPort := int(pool.Spec.TargetPorts[0].Number)
 	targetEndpoints := []string{}
 
 	for _, pod := range result.ProfileResults[result.PrimaryProfileName].TargetPods {
diff --git a/pkg/epp/requestcontrol/director_test.go b/pkg/epp/requestcontrol/director_test.go
index c25b9640d..e7968d4d9 100644
--- a/pkg/epp/requestcontrol/director_test.go
+++ b/pkg/epp/requestcontrol/director_test.go
@@ -101,7 +101,7 @@ func TestDirector_HandleRequest(t *testing.T) {
 	pool := &v1.InferencePool{
 		ObjectMeta: metav1.ObjectMeta{Name: "test-pool", Namespace: "default"},
 		Spec: v1.InferencePoolSpec{
-			TargetPortNumber: int32(8000),
+			TargetPorts: []v1.Port{{Number: v1.PortNumber(int32(8000))}},
 			Selector: v1.LabelSelector{
 				MatchLabels: map[v1.LabelKey]v1.LabelValue{
 					"app": "inference",
diff --git a/pkg/epp/util/testing/wrappers.go b/pkg/epp/util/testing/wrappers.go
index c637713f8..9eeb3ad9d 100644
--- a/pkg/epp/util/testing/wrappers.go
+++ b/pkg/epp/util/testing/wrappers.go
@@ -200,8 +200,8 @@ func (m *InferencePoolWrapper) Selector(selector map[string]string) *InferencePo
 	return m
 }
 
-func (m *InferencePoolWrapper) TargetPortNumber(p int32) *InferencePoolWrapper {
-	m.Spec.TargetPortNumber = p
+func (m *InferencePoolWrapper) TargetPorts(p int32) *InferencePoolWrapper {
+	m.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(p)}}
 	return m
 }
 
@@ -215,14 +215,14 @@ func (m *InferencePoolWrapper) ObjRef() *v1.InferencePool {
 	return &m.InferencePool
 }
 
-// XInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool.
-type XInferencePoolWrapper struct {
+// AlphaInferencePoolWrapper wraps an group "inference.networking.x-k8s.io" InferencePool.
+type AlphaInferencePoolWrapper struct {
 	v1alpha2.InferencePool
 }
 
-// MakeXInferencePool creates a wrapper for a InferencePool.
-func MakeXInferencePool(name string) *XInferencePoolWrapper {
-	return &XInferencePoolWrapper{
+// MakeAlphaInferencePool creates a wrapper for a InferencePool.
+func MakeAlphaInferencePool(name string) *AlphaInferencePoolWrapper {
+	return &AlphaInferencePoolWrapper{
 		v1alpha2.InferencePool{
 			ObjectMeta: metav1.ObjectMeta{
 				Name: name,
@@ -232,12 +232,12 @@ func MakeXInferencePool(name string) *XInferencePoolWrapper {
 	}
 }
 
-func (m *XInferencePoolWrapper) Namespace(ns string) *XInferencePoolWrapper {
+func (m *AlphaInferencePoolWrapper) Namespace(ns string) *AlphaInferencePoolWrapper {
 	m.ObjectMeta.Namespace = ns
 	return m
 }
 
-func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInferencePoolWrapper {
+func (m *AlphaInferencePoolWrapper) Selector(selector map[string]string) *AlphaInferencePoolWrapper {
 	s := make(map[v1alpha2.LabelKey]v1alpha2.LabelValue)
 	for k, v := range selector {
 		s[v1alpha2.LabelKey(k)] = v1alpha2.LabelValue(v)
@@ -246,17 +246,17 @@ func (m *XInferencePoolWrapper) Selector(selector map[string]string) *XInference
 	return m
 }
 
-func (m *XInferencePoolWrapper) TargetPortNumber(p int32) *XInferencePoolWrapper {
+func (m *AlphaInferencePoolWrapper) TargetPortNumber(p int32) *AlphaInferencePoolWrapper {
 	m.Spec.TargetPortNumber = p
 	return m
 }
 
-func (m *XInferencePoolWrapper) ExtensionRef(name string) *XInferencePoolWrapper {
-	m.Spec.ExtensionRef = &v1alpha2.Extension{Name: v1alpha2.ObjectName(name)}
+func (m *AlphaInferencePoolWrapper) ExtensionRef(name string) *AlphaInferencePoolWrapper {
+	m.Spec.ExtensionRef = v1alpha2.Extension{Name: v1alpha2.ObjectName(name)}
 	return m
 }
 
 // Obj returns the wrapped InferencePool.
-func (m *XInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool {
+func (m *AlphaInferencePoolWrapper) ObjRef() *v1alpha2.InferencePool {
 	return &m.InferencePool
 }
diff --git a/site-src/api-types/inferencepool.md b/site-src/api-types/inferencepool.md
index d2794478b..c4481b1ad 100644
--- a/site-src/api-types/inferencepool.md
+++ b/site-src/api-types/inferencepool.md
@@ -33,7 +33,8 @@ kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     app: vllm-llama3-8b-instruct
   extensionRef:
diff --git a/site-src/guides/implementers.md b/site-src/guides/implementers.md
index 747e934a2..6fce01657 100644
--- a/site-src/guides/implementers.md
+++ b/site-src/guides/implementers.md
@@ -47,7 +47,8 @@ kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     app: vllm-llama3-8b-instruct
   extensionRef:
diff --git a/site-src/guides/inferencepool-rollout.md b/site-src/guides/inferencepool-rollout.md
index 80a6e3bf6..98b3cc4cc 100644
--- a/site-src/guides/inferencepool-rollout.md
+++ b/site-src/guides/inferencepool-rollout.md
@@ -208,7 +208,8 @@ kind: InferencePool
 metadata:
   name: vllm-llama3-8b-instruct-new
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     app: vllm-llama3-8b-instruct-new
   extensionRef:
diff --git a/test/testdata/inferencepool-e2e.yaml b/test/testdata/inferencepool-e2e.yaml
index aec574a39..7c546ba37 100644
--- a/test/testdata/inferencepool-e2e.yaml
+++ b/test/testdata/inferencepool-e2e.yaml
@@ -4,7 +4,8 @@ metadata:
   labels:
   name: vllm-llama3-8b-instruct
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     matchLabels:
       app: vllm-llama3-8b-instruct
diff --git a/test/testdata/inferencepool-with-model-hermetic.yaml b/test/testdata/inferencepool-with-model-hermetic.yaml
index 707a76eed..20fea8d1a 100644
--- a/test/testdata/inferencepool-with-model-hermetic.yaml
+++ b/test/testdata/inferencepool-with-model-hermetic.yaml
@@ -4,7 +4,8 @@ metadata:
   name: vllm-llama3-8b-instruct-pool
   namespace: default
 spec:
-  targetPortNumber: 8000
+  targetPorts:
+    - number: 8000
   selector:
     matchLabels:
       app: vllm-llama3-8b-instruct-pool
diff --git a/test/utils/server.go b/test/utils/server.go
index f76e147af..51eb33fa0 100644
--- a/test/utils/server.go
+++ b/test/utils/server.go
@@ -71,7 +71,7 @@ func PrepareForTestStreamingServer(objectives []*v1alpha2.InferenceObjective, po
 		WithObjects(initObjs...).
 		Build()
 	pool := testutil.MakeInferencePool(poolName).Namespace(namespace).ObjRef()
-	pool.Spec.TargetPortNumber = poolPort
+	pool.Spec.TargetPorts = []v1.Port{{Number: v1.PortNumber(poolPort)}}
 	_ = ds.PoolSet(context.Background(), fakeClient, pool)
 
 	return ctx, cancel, ds, pmc