Skip to content

Commit e8c3a9c

Browse files
authored
* Removed constructor that used aliased field Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> * Removed references to aliased field Signed-off-by: Shmuel Kallner <kallner@il.ibm.com> --------- Signed-off-by: Shmuel Kallner <kallner@il.ibm.com>
1 parent c3e4412 commit e8c3a9c

18 files changed

Lines changed: 86 additions & 100 deletions

pkg/epp/backend/metrics/metrics_state.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,5 @@ import (
2020
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
2121
)
2222

23-
// NewMetricsState initializes a new MetricsState and returns its pointer.
24-
func NewMetricsState() *MetricsState {
25-
return datalayer.NewMetrics()
26-
}
27-
2823
// MetricsState holds the latest state of the metrics that were scraped from a pod.
2924
type MetricsState = datalayer.Metrics

pkg/epp/backend/metrics/types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func (f *PodMetricsFactory) NewEndpoint(parentCtx context.Context, metadata *dat
6363
logger: log.FromContext(parentCtx).WithValues("endpoint", metadata.NamespacedName),
6464
}
6565
pm.metadata.Store(metadata)
66-
pm.metrics.Store(NewMetricsState())
66+
pm.metrics.Store(datalayer.NewMetrics())
6767

6868
pm.startRefreshLoop(parentCtx)
6969
return pm

pkg/epp/datastore/datastore_test.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ var (
233233
Name: "pod1",
234234
},
235235
}
236-
pod1Metrics = &backendmetrics.MetricsState{
236+
pod1Metrics = &datalayer.Metrics{
237237
WaitingQueueSize: 0,
238238
KVCacheUsagePercent: 0.2,
239239
MaxActiveModels: 2,
@@ -248,7 +248,7 @@ var (
248248
Name: "pod2",
249249
},
250250
}
251-
pod2Metrics = &backendmetrics.MetricsState{
251+
pod2Metrics = &datalayer.Metrics{
252252
WaitingQueueSize: 1,
253253
KVCacheUsagePercent: 0.2,
254254
MaxActiveModels: 2,
@@ -280,41 +280,41 @@ var (
280280
func TestMetrics(t *testing.T) {
281281
tests := []struct {
282282
name string
283-
metrics map[types.NamespacedName]*backendmetrics.MetricsState
283+
metrics map[types.NamespacedName]*datalayer.Metrics
284284
err map[types.NamespacedName]error
285285
storePods []*corev1.Pod
286-
want []*backendmetrics.MetricsState
286+
want []*datalayer.Metrics
287287
predict func(backendmetrics.PodMetrics) bool
288288
}{
289289
{
290290
name: "Probing metrics success",
291-
metrics: map[types.NamespacedName]*backendmetrics.MetricsState{
291+
metrics: map[types.NamespacedName]*datalayer.Metrics{
292292
pod1NamespacedName: pod1Metrics,
293293
pod2NamespacedName: pod2Metrics,
294294
},
295295
storePods: []*corev1.Pod{pod1, pod2},
296-
want: []*backendmetrics.MetricsState{pod1Metrics, pod2Metrics},
296+
want: []*datalayer.Metrics{pod1Metrics, pod2Metrics},
297297
},
298298
{
299299
name: "Only pods in are probed",
300-
metrics: map[types.NamespacedName]*backendmetrics.MetricsState{
300+
metrics: map[types.NamespacedName]*datalayer.Metrics{
301301
pod1NamespacedName: pod1Metrics,
302302
pod2NamespacedName: pod2Metrics,
303303
},
304304
storePods: []*corev1.Pod{pod1},
305-
want: []*backendmetrics.MetricsState{pod1Metrics},
305+
want: []*datalayer.Metrics{pod1Metrics},
306306
},
307307
{
308308
name: "Probing metrics error",
309309
err: map[types.NamespacedName]error{
310310
pod2NamespacedName: errors.New("injected error"),
311311
},
312-
metrics: map[types.NamespacedName]*backendmetrics.MetricsState{
312+
metrics: map[types.NamespacedName]*datalayer.Metrics{
313313
pod1NamespacedName: pod1Metrics,
314314
pod2NamespacedName: pod2Metrics,
315315
},
316316
storePods: []*corev1.Pod{pod1, pod2},
317-
want: []*backendmetrics.MetricsState{pod1Metrics,
317+
want: []*datalayer.Metrics{pod1Metrics,
318318
// Failed to fetch pod2 metrics so it remains the default values.
319319
{
320320
ActiveModels: map[string]int{},
@@ -354,11 +354,11 @@ func TestMetrics(t *testing.T) {
354354
}
355355
assert.EventuallyWithT(t, func(t *assert.CollectT) {
356356
got := ds.PodList(test.predict)
357-
metrics := []*backendmetrics.MetricsState{}
357+
metrics := []*datalayer.Metrics{}
358358
for _, one := range got {
359359
metrics = append(metrics, one.GetMetrics())
360360
}
361-
diff := cmp.Diff(test.want, metrics, cmpopts.IgnoreFields(backendmetrics.MetricsState{}, "UpdateTime"), cmpopts.SortSlices(func(a, b *backendmetrics.MetricsState) bool {
361+
diff := cmp.Diff(test.want, metrics, cmpopts.IgnoreFields(datalayer.Metrics{}, "UpdateTime"), cmpopts.SortSlices(func(a, b *datalayer.Metrics) bool {
362362
return a.String() < b.String()
363363
}))
364364
assert.Equal(t, "", diff, "Unexpected diff (+got/-want)")

pkg/epp/metrics/collectors/inference_pool_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ var (
4343
},
4444
}
4545
pod1NamespacedName = types.NamespacedName{Name: pod1.Name + "-rank-0", Namespace: pod1.Namespace}
46-
pod1Metrics = &backendmetrics.MetricsState{
46+
pod1Metrics = &datalayer.Metrics{
4747
WaitingQueueSize: 100,
4848
KVCacheUsagePercent: 0.2,
4949
MaxActiveModels: 2,
@@ -70,7 +70,7 @@ func TestNoMetricsCollected(t *testing.T) {
7070
}
7171

7272
func TestMetricsCollected(t *testing.T) {
73-
metrics := map[types.NamespacedName]*backendmetrics.MetricsState{
73+
metrics := map[types.NamespacedName]*datalayer.Metrics{
7474
pod1NamespacedName: pod1Metrics,
7575
}
7676
period := time.Millisecond

pkg/epp/requestcontrol/director.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,9 +271,9 @@ func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []sch
271271
pm := make([]schedulingtypes.Endpoint, len(pods))
272272
for i, pod := range pods {
273273
if pod.GetAttributes() != nil {
274-
pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: pod.GetAttributes().Clone()}
274+
pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), Metrics: pod.GetMetrics().Clone(), AttributeMap: pod.GetAttributes().Clone()}
275275
} else {
276-
pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), MetricsState: pod.GetMetrics().Clone(), AttributeMap: datalayer.NewAttributes()}
276+
pm[i] = &schedulingtypes.PodMetrics{EndpointMetadata: pod.GetMetadata().Clone(), Metrics: pod.GetMetrics().Clone(), AttributeMap: datalayer.NewAttributes()}
277277
}
278278
}
279279

pkg/epp/scheduling/framework/plugins/multi/prefix/plugin_test.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ import (
2727
"github.com/stretchr/testify/assert"
2828
k8stypes "k8s.io/apimachinery/pkg/types"
2929

30-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3130
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
3231
dplugins "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer/plugins/approximateprefix"
3332
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
@@ -46,9 +45,9 @@ func TestPrefixPluginCompletion(t *testing.T) {
4645
}
4746
plugin := New(context.Background(), config)
4847

49-
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, MetricsState: backendmetrics.NewMetricsState()}
50-
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, MetricsState: backendmetrics.NewMetricsState()}
51-
endpoint3 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}, MetricsState: backendmetrics.NewMetricsState()}
48+
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, Metrics: datalayer.NewMetrics()}
49+
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, Metrics: datalayer.NewMetrics()}
50+
endpoint3 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}, Metrics: datalayer.NewMetrics()}
5251
endpoints := []types.Endpoint{endpoint1, endpoint2, endpoint3}
5352

5453
// First request.
@@ -215,7 +214,7 @@ func TestPrefixPluginChatCompletions(t *testing.T) {
215214
}
216215
plugin := New(context.Background(), config)
217216

218-
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, MetricsState: &backendmetrics.MetricsState{}}
217+
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, Metrics: &datalayer.Metrics{}}
219218
endpoints := []types.Endpoint{endpoint1}
220219

221220
// Test with chat completions request
@@ -249,8 +248,8 @@ func TestPrefixPluginChatCompletionsGrowth(t *testing.T) {
249248
}
250249
plugin := New(context.Background(), config)
251250

252-
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, MetricsState: &backendmetrics.MetricsState{}}
253-
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, MetricsState: &backendmetrics.MetricsState{}}
251+
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, Metrics: &datalayer.Metrics{}}
252+
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, Metrics: &datalayer.Metrics{}}
254253
endpoints := []types.Endpoint{endpoint1, endpoint2}
255254

256255
// First request with initial conversation
@@ -479,7 +478,7 @@ func TestPrefixPluginAutoTune(t *testing.T) {
479478
podName := "pod-autotune"
480479
endpoint := &types.PodMetrics{
481480
EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: podName}},
482-
MetricsState: &backendmetrics.MetricsState{
481+
Metrics: &datalayer.Metrics{
483482
CacheBlockSize: 16, // 16 tokens * 4 chars/token = 64 chars per block
484483
CacheNumGPUBlocks: 1000, // 1000 blocks capacity
485484
},
@@ -587,8 +586,8 @@ func TestPrepareRequestData(t *testing.T) {
587586
}
588587
plugin := New(context.Background(), config)
589588

590-
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, MetricsState: backendmetrics.NewMetricsState(), AttributeMap: datalayer.NewAttributes()}
591-
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, MetricsState: backendmetrics.NewMetricsState(), AttributeMap: datalayer.NewAttributes()}
589+
endpoint1 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}, Metrics: datalayer.NewMetrics(), AttributeMap: datalayer.NewAttributes()}
590+
endpoint2 := &types.PodMetrics{EndpointMetadata: &datalayer.EndpointMetadata{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}, Metrics: datalayer.NewMetrics(), AttributeMap: datalayer.NewAttributes()}
592591
endpoints := []types.Endpoint{endpoint1, endpoint2}
593592

594593
// First request to populate cache.

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525
"time"
2626

2727
"sigs.k8s.io/controller-runtime/pkg/log"
28-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
28+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
2929
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
3030

3131
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -49,7 +49,7 @@ func refreshLastSeenMetrics(ctx context.Context, sloCtx *sloRequestContext) {
4949
}
5050

5151
// getLatestMetricsForProfile retrieves the latest metrics for prediction from sloCtx.LastSeenMetrics.
52-
func getLatestMetricsForProfile(sloCtx *sloRequestContext) (*backendmetrics.MetricsState, error) {
52+
func getLatestMetricsForProfile(sloCtx *sloRequestContext) (*datalayer.Metrics, error) {
5353
if len(sloCtx.lastSeenMetrics) == 0 {
5454
return nil, errors.New("no last seen metrics available for prediction")
5555
}
@@ -164,7 +164,7 @@ func recordTTFTTrainingData(
164164
ctx context.Context,
165165
predictor latencypredictor.PredictorInterface,
166166
sloCtx *sloRequestContext,
167-
m *backendmetrics.MetricsState,
167+
m *datalayer.Metrics,
168168
now time.Time,
169169
prefixCacheScore float64,
170170
) {
@@ -311,7 +311,7 @@ func processTokenForLatencyPrediction(
311311
func bulkPredictWithMetrics(
312312
ctx context.Context,
313313
predictor latencypredictor.PredictorInterface,
314-
metricsStates []*backendmetrics.MetricsState,
314+
metricsStates []*datalayer.Metrics,
315315
prompts []string,
316316
generatedTokenCounts []int,
317317
prefixCacheScores []float64,

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/latencypredictor_helper_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323
"testing"
2424

2525
"github.com/stretchr/testify/assert"
26-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
2727
latencypredictor "sigs.k8s.io/gateway-api-inference-extension/sidecars/latencypredictorasync"
2828
)
2929

@@ -35,7 +35,7 @@ func TestBulkPredictWithMetrics(t *testing.T) {
3535
},
3636
}
3737

38-
metricsStates := []*backendmetrics.MetricsState{
38+
metricsStates := []*datalayer.Metrics{
3939
{KVCacheUsagePercent: 0.5},
4040
{KVCacheUsagePercent: 0.6},
4141
}
@@ -58,7 +58,7 @@ func TestBulkPredictWithMetrics_Error(t *testing.T) {
5858
err: errors.New("prediction failed"),
5959
}
6060

61-
metricsStates := []*backendmetrics.MetricsState{
61+
metricsStates := []*datalayer.Metrics{
6262
{KVCacheUsagePercent: 0.5},
6363
}
6464
prompts := []string{"prompt1"}
@@ -73,7 +73,7 @@ func TestBulkPredictWithMetrics_Error(t *testing.T) {
7373

7474
func TestBulkPredictWithMetrics_InputMismatch(t *testing.T) {
7575
mockPredictor := &mockPredictor{}
76-
metricsStates := []*backendmetrics.MetricsState{{}}
76+
metricsStates := []*datalayer.Metrics{{}}
7777
prompts := []string{"prompt1", "prompt2"} // Mismatch length
7878
generatedTokenCounts := []int{1}
7979
prefixCacheScores := []float64{0.0}
@@ -87,7 +87,7 @@ func TestBulkPredictWithMetrics_InputMismatch(t *testing.T) {
8787

8888
func TestBulkPredictWithMetrics_NilMetricsState(t *testing.T) {
8989
mockPredictor := &mockPredictor{}
90-
metricsStates := []*backendmetrics.MetricsState{nil} // Nil metrics state
90+
metricsStates := []*datalayer.Metrics{nil} // Nil metrics state
9191
prompts := []string{"prompt1"}
9292
generatedTokenCounts := []int{1}
9393
prefixCacheScores := []float64{0.0}

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/prediction.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"context"
2222

2323
"sigs.k8s.io/controller-runtime/pkg/log"
24-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
24+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
2525
schedulingtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2626
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
2727
latencypredictor "sigs.k8s.io/gateway-api-inference-extension/sidecars/latencypredictorasync"
@@ -46,7 +46,7 @@ func (s *SLOAwareRouter) generatePredictions(ctx context.Context, request *sched
4646
predictions := make([]endpointPredictionResult, 0, len(candidateEndpoints))
4747

4848
// Prepare inputs for bulk prediction
49-
metricsStates := make([]*backendmetrics.MetricsState, len(candidateEndpoints))
49+
metricsStates := make([]*datalayer.Metrics, len(candidateEndpoints))
5050
prompts := make([]string, len(candidateEndpoints))
5151
generatedTokenCounts := make([]int, len(candidateEndpoints))
5252
prefixCacheScores := make([]float64, len(candidateEndpoints))

pkg/epp/scheduling/framework/plugins/multi/slo_aware_router/requestcontrol_hooks.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"sigs.k8s.io/controller-runtime/pkg/log"
2727

2828
"k8s.io/apimachinery/pkg/types"
29-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
3029
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datalayer"
3130
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
3231
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
@@ -44,7 +43,7 @@ type sloRequestContext struct {
4443
schedulingRequest schedulingtypes.LLMRequest
4544
targetMetadata *datalayer.EndpointMetadata
4645
schedulingResult *schedulingtypes.SchedulingResult
47-
lastSeenMetrics map[string]*backendmetrics.MetricsState
46+
lastSeenMetrics map[string]*datalayer.Metrics
4847
lastTokenTimestamp time.Time
4948
requestReceivedTimestamp time.Time
5049
generatedTokenCount int
@@ -74,7 +73,7 @@ type sloRequestContext struct {
7473
func newSLORequestContext(request *schedulingtypes.LLMRequest) *sloRequestContext {
7574
return &sloRequestContext{
7675
schedulingRequest: *request,
77-
lastSeenMetrics: make(map[string]*backendmetrics.MetricsState),
76+
lastSeenMetrics: make(map[string]*datalayer.Metrics),
7877
prefixCacheScoresForEndpoints: make(map[string]float64),
7978
predictionsForScheduling: make([]endpointPredictionResult, 0),
8079
}

0 commit comments

Comments
 (0)