Skip to content
Closed
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
f8c851c
changed epp and conformance test to support v1
capri-xiyue Jul 7, 2025
a9447be
fixed duplicate import
capri-xiyue Jul 7, 2025
234d87e
changed docker file
capri-xiyue Jul 7, 2025
b241c59
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 8, 2025
8269787
fixed integration test
capri-xiyue Jul 8, 2025
f1f87e9
fixed e2e test
capri-xiyue Jul 8, 2025
39f1a2e
change manifests
capri-xiyue Jul 8, 2025
c01cbc8
added rbac
capri-xiyue Jul 11, 2025
5b0a03e
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 11, 2025
16c4ee7
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 11, 2025
f04b884
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 11, 2025
dd01c6f
tried all alias
capri-xiyue Jul 11, 2025
38087dc
push local changes
capri-xiyue Jul 11, 2025
35c7a62
changed inferencepoollist not to use v1.InferencePoolList
capri-xiyue Jul 11, 2025
ac5adcc
changed comments
capri-xiyue Jul 11, 2025
c9837d0
fixed presubmit
capri-xiyue Jul 11, 2025
28b7e11
change to v1 ip
capri-xiyue Jul 11, 2025
c089347
revert to corev1 objectreference
capri-xiyue Jul 11, 2025
28657a7
revert back to corev1 objectreference
capri-xiyue Jul 11, 2025
7197a8e
change crd
capri-xiyue Jul 11, 2025
cf1d17f
remove crd
capri-xiyue Jul 11, 2025
256bdf1
manually add x-k8s.io inferencepool
capri-xiyue Jul 11, 2025
1435afd
revert crd
capri-xiyue Jul 12, 2025
d531bc6
Revert "added rbac"
capri-xiyue Jul 12, 2025
593d7b9
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 12, 2025
b387329
run generator
capri-xiyue Jul 12, 2025
96c13d9
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 12, 2025
4051ae2
revert to workable
capri-xiyue Jul 12, 2025
df1beb8
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 12, 2025
aba148d
fixed test
capri-xiyue Jul 12, 2025
bcad03c
test
capri-xiyue Jul 12, 2025
e610140
revert it back to originall workable version
capri-xiyue Jul 12, 2025
130f98a
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 15, 2025
cd260b5
changed unused code
capri-xiyue Jul 15, 2025
47280ae
fixed pipeline:
capri-xiyue Jul 15, 2025
d8964cd
fixed missing dependecny update
capri-xiyue Jul 15, 2025
c0e2832
fixed imports
capri-xiyue Jul 15, 2025
c877fab
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 15, 2025
7987066
merge to v1-inferencepool
capri-xiyue Jul 15, 2025
f67f493
fixed integration test
capri-xiyue Jul 15, 2025
04d0794
fixed import
capri-xiyue Jul 15, 2025
c3a2623
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 15, 2025
df3063b
change to install scheme
capri-xiyue Jul 15, 2025
6f8cb64
fixed import
capri-xiyue Jul 16, 2025
04588ee
change back to main image
capri-xiyue Jul 16, 2025
aebc033
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 17, 2025
2598faa
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 17, 2025
8ef6278
Merge branch 'capri-xiyue/v1-inference-pool' into capri-xiyue/v1-epp
capri-xiyue Jul 18, 2025
3a5d807
Add an "Implementing a Compatible Data Plane" section to the implemen…
AndresGuedez Jul 18, 2025
6cf8f31
feat(flowcontrol): Implement registry shard (#1187)
LukeAVanDrie Jul 18, 2025
9ac2a4d
feat(flowcontrol): refine types and docs (#1191)
LukeAVanDrie Jul 21, 2025
d9c5c27
docs: update to use kebab-cased flags changed at #1177 (#1193)
nekomeowww Jul 21, 2025
32ad5bb
added graceful shutdown when scheduler config is not initialized (#1198)
nirrozenbaum Jul 21, 2025
1ea9959
feat: move x-k8s to apix and add v1 InferencePool to api/v1 (#1116)
capri-xiyue Jul 21, 2025
a42c53f
resolve merge conflicts
capri-xiyue Jul 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions conformance/conformance.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import (
// _ "sigs.k8s.io/gateway-api-inference-extension/conformance/tests/model_routing"

// Import the Inference Extension API types
inferencev1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
inferencev1alpha2 "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
inferenceconfig "sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config"
)
Expand Down Expand Up @@ -132,6 +133,8 @@ func DefaultOptions(t *testing.T) confsuite.ConformanceOptions {
// Register Inference Extension API types
t.Logf("Attempting to install inferencev1alpha2 types into scheme from package: %s", inferencev1alpha2.GroupName)
require.NoError(t, inferencev1alpha2.Install(scheme), "failed to install inferencev1alpha2 types into scheme")
t.Logf("Attempting to install inferencev1 types into scheme from package: %s", inferencev1.GroupName)
require.NoError(t, inferencev1.Install(scheme), "failed to install inferencev1 types into scheme")

clientOptions := client.Options{Scheme: scheme}
c, err := client.New(cfg, clientOptions)
Expand Down
11 changes: 7 additions & 4 deletions conformance/resources/manifests/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ spec:
fieldPath: status.podIP
---
# --- Primary InferencePool Definition ---
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: primary-inference-pool
Expand Down Expand Up @@ -196,7 +196,7 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
image: us-west1-docker.pkg.dev/xiyue-gke-dev-new/k8s-dev-images/epp:f1f87e9
imagePullPolicy: Always
args:
- -poolName
Expand Down Expand Up @@ -239,7 +239,7 @@ spec:
name: plugins-config
---
# --- Secondary InferencePool Definition ---
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: secondary-inference-pool
Expand Down Expand Up @@ -290,7 +290,7 @@ spec:
terminationGracePeriodSeconds: 130
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
image: us-west1-docker.pkg.dev/xiyue-gke-dev-new/k8s-dev-images/epp:f1f87e9
imagePullPolicy: Always
args:
- -poolName
Expand Down Expand Up @@ -363,6 +363,9 @@ rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels", "inferencepools"]
verbs: ["get", "list", "watch"]
- apiGroups: ["inference.networking.k8s.io"]
resources: ["inferencepools"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "watch"]
Expand Down
2 changes: 1 addition & 1 deletion conformance/tests/basic/epp_unavailable_fail_open.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ spec:
- "secondary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: secondary-inference-pool # Use secondary-inferencePool because it has failureMode set to failOpen
matches:
Expand Down
2 changes: 1 addition & 1 deletion conformance/tests/basic/gateway_following_epp_routing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ spec:
- "primary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
matches:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
sectionName: http
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: non-existent-inference-pool # Intentionally Non-Existing
matches:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
- "primary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
matches:
Expand All @@ -35,7 +35,7 @@ spec:
- "secondary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: secondary-inference-pool
matches:
Expand Down
2 changes: 1 addition & 1 deletion conformance/tests/basic/inferencepool_accepted.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
sectionName: http
rules:
- backendRefs:
- group: inference.networking.x-k8s.io # InferencePool API group
- group: inference.networking.k8s.io # InferencePool API group
kind: InferencePool
name: primary-inference-pool # Name of the InferencePool this route points to
# namespace: gateway-conformance-app-backend - is omitted since it is in the same namespace as HTTPRoute
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
- "port-unspecified.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
# Port is intentionally unspecified here
Expand All @@ -42,7 +42,7 @@ spec:
- "port-matching.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
port: 3000 # Port matches InferencePool's targetPortNumber
Expand All @@ -68,7 +68,7 @@ spec:
- "port-non-matching.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
port: 8888 # Port does NOT match InferencePool's targetPortNumber
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
inferenceapi "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
inferenceapi "sigs.k8s.io/gateway-api-inference-extension/api/v1"
gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
"sigs.k8s.io/gateway-api/conformance/utils/kubernetes"
"sigs.k8s.io/gateway-api/conformance/utils/suite"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: inference.networking.x-k8s.io/v1alpha2
apiVersion: inference.networking.k8s.io/v1
kind: InferencePool
metadata:
name: pool-with-invalid-epp
Expand All @@ -23,7 +23,7 @@ spec:
- backendRefs:
- name: pool-with-invalid-epp
kind: InferencePool
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
matches:
- path:
type: PathPrefix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ spec:
backendRefs:
- name: primary-inference-pool
kind: InferencePool
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
- matches:
- path:
type: PathPrefix
value: /secondary
backendRefs:
- name: secondary-inference-pool
kind: InferencePool
group: inference.networking.x-k8s.io
group: inference.networking.k8s.io
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ spec:
- "primary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
matches:
Expand All @@ -45,7 +45,7 @@ spec:
- "secondary.example.com"
rules:
- backendRefs:
- group: inference.networking.x-k8s.io
- group: inference.networking.k8s.io
kind: InferencePool
name: primary-inference-pool
matches:
Expand Down
2 changes: 1 addition & 1 deletion conformance/utils/kubernetes/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

inferenceapi "sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
inferenceapi "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/conformance/utils/config"
gatewayv1 "sigs.k8s.io/gateway-api/apis/v1"
gatewayapiconfig "sigs.k8s.io/gateway-api/conformance/utils/config"
Expand Down
4 changes: 2 additions & 2 deletions pkg/epp/backend/metrics/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (

"github.com/go-logr/logr"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)
Expand All @@ -36,7 +36,7 @@ const (
)

type Datastore interface {
PoolGet() (*v1alpha2.InferencePool, error)
PoolGet() (*v1.InferencePool, error)
// PodMetrics operations
// PodGetAll returns all pods and metrics, including fresh and stale.
PodGetAll() []PodMetrics
Expand Down
6 changes: 3 additions & 3 deletions pkg/epp/backend/metrics/pod_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
)

var (
Expand Down Expand Up @@ -86,8 +86,8 @@ func TestMetricsRefresh(t *testing.T) {

type fakeDataStore struct{}

func (f *fakeDataStore) PoolGet() (*v1alpha2.InferencePool, error) {
return &v1alpha2.InferencePool{Spec: v1alpha2.InferencePoolSpec{TargetPortNumber: 8000}}, nil
func (f *fakeDataStore) PoolGet() (*v1.InferencePool, error) {
return &v1.InferencePool{Spec: v1.InferencePoolSpec{TargetPortNumber: 8000}}, nil
}
func (f *fakeDataStore) PodGetAll() []PodMetrics {
// Not implemented.
Expand Down
2 changes: 2 additions & 0 deletions pkg/epp/controller/inferencemodel_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
Expand Down Expand Up @@ -181,6 +182,7 @@ func TestInferenceModelReconciler(t *testing.T) {
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = v1alpha2.Install(scheme)
_ = v1.Install(scheme)
initObjs := []client.Object{}
if test.model != nil {
initObjs = append(initObjs, test.model)
Expand Down
6 changes: 3 additions & 3 deletions pkg/epp/controller/inferencepool_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
)
Expand All @@ -44,7 +44,7 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques

logger.Info("Reconciling InferencePool")

infPool := &v1alpha2.InferencePool{}
infPool := &v1.InferencePool{}

if err := c.Get(ctx, req.NamespacedName, infPool); err != nil {
if errors.IsNotFound(err) {
Expand All @@ -70,6 +70,6 @@ func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reques

func (c *InferencePoolReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha2.InferencePool{}).
For(&v1.InferencePool{}).
Complete(c)
}
11 changes: 7 additions & 4 deletions pkg/epp/controller/inferencepool_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import (
"testing"
"time"

v1 "sigs.k8s.io/gateway-api-inference-extension/api/v1"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
corev1 "k8s.io/api/core/v1"
Expand All @@ -30,7 +33,6 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/gateway-api-inference-extension/apix/v1alpha2"
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
utiltest "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/testing"
Expand Down Expand Up @@ -78,6 +80,7 @@ func TestInferencePoolReconciler(t *testing.T) {
scheme := runtime.NewScheme()
_ = clientgoscheme.AddToScheme(scheme)
_ = v1alpha2.Install(scheme)
_ = v1.Install(scheme)

// Create a fake client with the pool and the pods.
initialObjects := []client.Object{pool1, pool2}
Expand Down Expand Up @@ -106,11 +109,11 @@ func TestInferencePoolReconciler(t *testing.T) {
t.Errorf("Unexpected diff (+got/-want): %s", diff)
}

newPool1 := &v1alpha2.InferencePool{}
newPool1 := &v1.InferencePool{}
if err := fakeClient.Get(ctx, req.NamespacedName, newPool1); err != nil {
t.Errorf("Unexpected pool get error: %v", err)
}
newPool1.Spec.Selector = map[v1alpha2.LabelKey]v1alpha2.LabelValue{"app": "vllm_v2"}
newPool1.Spec.Selector = map[v1.LabelKey]v1.LabelValue{"app": "vllm_v2"}
if err := fakeClient.Update(ctx, newPool1, &client.UpdateOptions{}); err != nil {
t.Errorf("Unexpected pool update error: %v", err)
}
Expand Down Expand Up @@ -153,7 +156,7 @@ func TestInferencePoolReconciler(t *testing.T) {
}

type diffStoreParams struct {
wantPool *v1alpha2.InferencePool
wantPool *v1.InferencePool
wantPods []string
wantModels []*v1alpha2.InferenceModel
}
Expand Down
Loading