diff --git a/pkg/epp/metrics/metrics.go b/pkg/epp/metrics/metrics.go index 4e9e95ef9..acb2f0304 100644 --- a/pkg/epp/metrics/metrics.go +++ b/pkg/epp/metrics/metrics.go @@ -182,28 +182,17 @@ var ( }, []string{}, ) - SchedulerPluginProcessingLatencies = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Subsystem: InferenceExtension, - Name: "scheduler_plugin_duration_seconds", - Help: metricsutil.HelpMsgWithStability("Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.", compbasemetrics.ALPHA), - Buckets: []float64{ - 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, - }, - }, - []string{"plugin_type", "plugin_name"}, - ) - RequestControlPluginProcessingLatencies = prometheus.NewHistogramVec( + PluginProcessingLatencies = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Subsystem: InferenceExtension, - Name: "request_control_plugin_duration_seconds", - Help: metricsutil.HelpMsgWithStability("RequestControl plugin processing latency distribution in seconds for each plugin type and plugin name.", compbasemetrics.ALPHA), + Name: "plugin_duration_seconds", + Help: metricsutil.HelpMsgWithStability("Plugin processing latency distribution in seconds for each extension point, plugin type and plugin name.", compbasemetrics.ALPHA), Buckets: []float64{ 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, }, }, - []string{"plugin_type", "plugin_name"}, + []string{"extension_point", "plugin_type", "plugin_name"}, ) // Prefix indexer Metrics @@ -265,9 +254,8 @@ func Register(customCollectors ...prometheus.Collector) { metrics.Registry.MustRegister(inferencePoolAvgKVCache) metrics.Registry.MustRegister(inferencePoolAvgQueueSize) metrics.Registry.MustRegister(inferencePoolReadyPods) - metrics.Registry.MustRegister(SchedulerPluginProcessingLatencies) metrics.Registry.MustRegister(SchedulerE2ELatency) - metrics.Registry.MustRegister(RequestControlPluginProcessingLatencies) + metrics.Registry.MustRegister(PluginProcessingLatencies) metrics.Registry.MustRegister(InferenceExtensionInfo) metrics.Registry.MustRegister(PrefixCacheSize) metrics.Registry.MustRegister(PrefixCacheHitRatio) @@ -292,9 +280,8 @@ func Reset() { inferencePoolAvgKVCache.Reset() inferencePoolAvgQueueSize.Reset() inferencePoolReadyPods.Reset() - SchedulerPluginProcessingLatencies.Reset() SchedulerE2ELatency.Reset() - RequestControlPluginProcessingLatencies.Reset() + PluginProcessingLatencies.Reset() InferenceExtensionInfo.Reset() PrefixCacheSize.Reset() PrefixCacheHitRatio.Reset() @@ -396,19 +383,14 @@ func RecordInferencePoolReadyPods(name string, runningPods float64) { inferencePoolReadyPods.WithLabelValues(name).Set(runningPods) } -// RecordSchedulerPluginProcessingLatency records the processing latency for a scheduler plugin. -func RecordSchedulerPluginProcessingLatency(pluginType, pluginName string, duration time.Duration) { - SchedulerPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds()) -} - // RecordSchedulerE2ELatency records the end-to-end scheduling latency. func RecordSchedulerE2ELatency(duration time.Duration) { SchedulerE2ELatency.WithLabelValues().Observe(duration.Seconds()) } -// RecordRequestControlPluginProcessingLatency records the processing latency for a request-control plugin. -func RecordRequestControlPluginProcessingLatency(pluginType, pluginName string, duration time.Duration) { - RequestControlPluginProcessingLatencies.WithLabelValues(pluginType, pluginName).Observe(duration.Seconds()) +// RecordPluginProcessingLatency records the processing latency for a plugin. +func RecordPluginProcessingLatency(extensionPoint, pluginType, pluginName string, duration time.Duration) { + PluginProcessingLatencies.WithLabelValues(extensionPoint, pluginType, pluginName).Observe(duration.Seconds()) } // RecordPrefixCacheSize records the size of the prefix indexer in megabytes. diff --git a/pkg/epp/metrics/metrics_test.go b/pkg/epp/metrics/metrics_test.go index d2e209bd1..f16183d3d 100644 --- a/pkg/epp/metrics/metrics_test.go +++ b/pkg/epp/metrics/metrics_test.go @@ -559,11 +559,12 @@ func TestInferencePoolMetrics(t *testing.T) { } } -func TestSchedulerPluginProcessingLatencies(t *testing.T) { +func TestPluginProcessingLatencies(t *testing.T) { type pluginLatency struct { - pluginType string - pluginName string - duration time.Duration + extensionPoint string + pluginType string + pluginName string + duration time.Duration } scenarios := []struct { name string @@ -573,24 +574,28 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) { name: "multiple plugins", latencies: []pluginLatency{ { - pluginType: "PostSchedule", - pluginName: "PluginB", - duration: 200 * time.Millisecond, + extensionPoint: "ProfilePicker", + pluginType: "ProfileHandler", + pluginName: "PluginB", + duration: 200 * time.Millisecond, }, { - pluginType: "Filter", - pluginName: "PluginC", - duration: 50 * time.Millisecond, + extensionPoint: "Filter", + pluginType: "TestFilter", + pluginName: "PluginC", + duration: 50 * time.Millisecond, }, { - pluginType: "Scorer", - pluginName: "PluginD", - duration: 10 * time.Millisecond, + extensionPoint: "Scorer", + pluginType: "TestScorer", + pluginName: "PluginD", + duration: 10 * time.Millisecond, }, { - pluginType: "Picker", - pluginName: "PluginE", - duration: 10 * time.Microsecond, + extensionPoint: "Picker", + pluginType: "TestPicker", + pluginName: "PluginE", + duration: 10 * time.Microsecond, }, }, }, @@ -599,10 +604,10 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) { for _, scenario := range scenarios { t.Run(scenario.name, func(t *testing.T) { for _, latency := range scenario.latencies { - RecordSchedulerPluginProcessingLatency(latency.pluginType, latency.pluginName, latency.duration) + RecordPluginProcessingLatency(latency.extensionPoint, latency.pluginType, latency.pluginName, latency.duration) } - wantPluginLatencies, err := os.Open("testdata/scheduler_plugin_processing_latencies_metric") + wantPluginLatencies, err := os.Open("testdata/plugin_processing_latencies_metric") defer func() { if err := wantPluginLatencies.Close(); err != nil { t.Error(err) @@ -611,7 +616,7 @@ func TestSchedulerPluginProcessingLatencies(t *testing.T) { if err != nil { t.Fatal(err) } - if err := testutil.GatherAndCompare(metrics.Registry, wantPluginLatencies, "inference_extension_scheduler_plugin_duration_seconds"); err != nil { + if err := testutil.GatherAndCompare(metrics.Registry, wantPluginLatencies, "inference_extension_plugin_duration_seconds"); err != nil { t.Error(err) } }) diff --git a/pkg/epp/metrics/testdata/plugin_processing_latencies_metric b/pkg/epp/metrics/testdata/plugin_processing_latencies_metric new file mode 100644 index 000000000..98820902b --- /dev/null +++ b/pkg/epp/metrics/testdata/plugin_processing_latencies_metric @@ -0,0 +1,54 @@ +# HELP inference_extension_plugin_duration_seconds [ALPHA] Plugin processing latency distribution in seconds for each extension point, plugin type and plugin name. +# TYPE inference_extension_plugin_duration_seconds histogram +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.0005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.01"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.02"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.05"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="0.1"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler",le="+Inf"} 1 +inference_extension_plugin_duration_seconds_sum{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler"} 0.2 +inference_extension_plugin_duration_seconds_count{extension_point="ProfilePicker",plugin_name="PluginB",plugin_type="ProfileHandler"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.0005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.01"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.02"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.05"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="0.1"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter",le="+Inf"} 1 +inference_extension_plugin_duration_seconds_sum{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter"} 0.05 +inference_extension_plugin_duration_seconds_count{extension_point="Filter",plugin_name="PluginC",plugin_type="TestFilter"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.0005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.001"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.002"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.005"} 0 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.01"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.02"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.05"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="0.1"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer",le="+Inf"} 1 +inference_extension_plugin_duration_seconds_sum{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer"} 0.01 +inference_extension_plugin_duration_seconds_count{extension_point="Scorer",plugin_name="PluginD",plugin_type="TestScorer"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0001"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0002"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.0005"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.001"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.002"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.005"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.01"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.02"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.05"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="0.1"} 1 +inference_extension_plugin_duration_seconds_bucket{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker",le="+Inf"} 1 +inference_extension_plugin_duration_seconds_sum{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker"} 1e-05 +inference_extension_plugin_duration_seconds_count{extension_point="Picker",plugin_name="PluginE",plugin_type="TestPicker"} 1 diff --git a/pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric b/pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric deleted file mode 100644 index 38ac8a09d..000000000 --- a/pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric +++ /dev/null @@ -1,54 +0,0 @@ -# HELP inference_extension_scheduler_plugin_duration_seconds [ALPHA] Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name. -# TYPE inference_extension_scheduler_plugin_duration_seconds histogram -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.0005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.01"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.02"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.05"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="0.1"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginB",plugin_type="PostSchedule",le="+Inf"} 1 -inference_extension_scheduler_plugin_duration_seconds_sum{plugin_name="PluginB",plugin_type="PostSchedule"} 0.2 -inference_extension_scheduler_plugin_duration_seconds_count{plugin_name="PluginB",plugin_type="PostSchedule"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.0005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.01"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.02"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.05"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="0.1"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginC",plugin_type="Filter",le="+Inf"} 1 -inference_extension_scheduler_plugin_duration_seconds_sum{plugin_name="PluginC",plugin_type="Filter"} 0.05 -inference_extension_scheduler_plugin_duration_seconds_count{plugin_name="PluginC",plugin_type="Filter"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.0005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.001"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.002"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.005"} 0 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.01"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.02"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.05"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="0.1"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginD",plugin_type="Scorer",le="+Inf"} 1 -inference_extension_scheduler_plugin_duration_seconds_sum{plugin_name="PluginD",plugin_type="Scorer"} 0.01 -inference_extension_scheduler_plugin_duration_seconds_count{plugin_name="PluginD",plugin_type="Scorer"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0001"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0002"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.0005"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.001"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.002"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.005"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.01"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.02"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.05"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="0.1"} 1 -inference_extension_scheduler_plugin_duration_seconds_bucket{plugin_name="PluginE",plugin_type="Picker",le="+Inf"} 1 -inference_extension_scheduler_plugin_duration_seconds_sum{plugin_name="PluginE",plugin_type="Picker"} 1e-05 -inference_extension_scheduler_plugin_duration_seconds_count{plugin_name="PluginE",plugin_type="Picker"} 1 diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go index a80215ed5..84bb8628a 100644 --- a/pkg/epp/requestcontrol/director.go +++ b/pkg/epp/requestcontrol/director.go @@ -328,22 +328,25 @@ func RandomWeightedDraw(logger logr.Logger, model *v1alpha2.InferenceModel, seed return "" } -func (d *Director) runPreRequestPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, schedulingResult *schedulingtypes.SchedulingResult, - targetPort int, -) { +func (d *Director) runPreRequestPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, + schedulingResult *schedulingtypes.SchedulingResult, targetPort int) { + loggerDebug := log.FromContext(ctx).V(logutil.DEBUG) for _, plugin := range d.preRequestPlugins { - log.FromContext(ctx).V(logutil.DEBUG).Info("Running pre-request plugin", "plugin", plugin.TypedName().Type) + loggerDebug.Info("Running pre-request plugin", "plugin", plugin.TypedName()) before := time.Now() plugin.PreRequest(ctx, request, schedulingResult, targetPort) - metrics.RecordRequestControlPluginProcessingLatency(PreRequestPluginType, plugin.TypedName().Type, time.Since(before)) + metrics.RecordPluginProcessingLatency(PreRequestExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running pre-request plugin successfully", "plugin", plugin.TypedName()) } } func (d *Director) runPostResponsePlugins(ctx context.Context, request *schedulingtypes.LLMRequest, response *Response, targetPod *backend.Pod) { + loggerDebug := log.FromContext(ctx).V(logutil.DEBUG) for _, plugin := range d.postResponsePlugins { - log.FromContext(ctx).V(logutil.DEBUG).Info("Running post-response plugin", "plugin", plugin.TypedName().Type) + loggerDebug.Info("Running post-response plugin", "plugin", plugin.TypedName()) before := time.Now() plugin.PostResponse(ctx, request, response, targetPod) - metrics.RecordRequestControlPluginProcessingLatency(PostResponsePluginType, plugin.TypedName().Type, time.Since(before)) + metrics.RecordPluginProcessingLatency(PostResponseExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running post-response plugin successfully", "plugin", plugin.TypedName()) } } diff --git a/pkg/epp/requestcontrol/plugins.go b/pkg/epp/requestcontrol/plugins.go index ba51c2afb..ca823a670 100644 --- a/pkg/epp/requestcontrol/plugins.go +++ b/pkg/epp/requestcontrol/plugins.go @@ -25,8 +25,8 @@ import ( ) const ( - PreRequestPluginType = "PreRequest" - PostResponsePluginType = "PostResponse" + PreRequestExtensionPoint = "PreRequest" + PostResponseExtensionPoint = "PostResponse" ) // PreRequest is called by the director after a getting result from scheduling layer and diff --git a/pkg/epp/scheduling/framework/plugins.go b/pkg/epp/scheduling/framework/plugins.go index 7e22d8618..aae362b91 100644 --- a/pkg/epp/scheduling/framework/plugins.go +++ b/pkg/epp/scheduling/framework/plugins.go @@ -24,12 +24,12 @@ import ( ) const ( - ProfilePickerType = "ProfilePicker" - FilterPluginType = "Filter" - ScorerPluginType = "Scorer" - PickerPluginType = "Picker" - PostCyclePluginType = "PostCycle" - ProcessProfilesResultsType = "ProcessProfilesResults" + ProfilePickerExtensionPoint = "ProfilePicker" + FilterExtensionPoint = "Filter" + ScorerExtensionPoint = "Scorer" + PickerExtensionPoint = "Picker" + PostCycleExtensionPoint = "PostCycle" + ProcessProfilesResultsExtensionPoint = "ProcessProfilesResults" ) // ProfileHandler defines the extension points for handling multi SchedulerProfile instances. diff --git a/pkg/epp/scheduling/framework/scheduler_profile.go b/pkg/epp/scheduling/framework/scheduler_profile.go index 0ca806d9d..107da766d 100644 --- a/pkg/epp/scheduling/framework/scheduler_profile.go +++ b/pkg/epp/scheduling/framework/scheduler_profile.go @@ -128,16 +128,16 @@ func (p *SchedulerProfile) runFilterPlugins(ctx context.Context, request *types. loggerDebug.Info("Before running filter plugins", "pods", filteredPods) for _, filter := range p.filters { - loggerDebug.Info("Running filter plugin", "plugin", filter.TypedName().Type) + loggerDebug.Info("Running filter plugin", "plugin", filter.TypedName()) before := time.Now() filteredPods = filter.Filter(ctx, cycleState, request, filteredPods) - metrics.RecordSchedulerPluginProcessingLatency(FilterPluginType, filter.TypedName().Type, time.Since(before)) - loggerDebug.Info("Filter plugin result", "plugin", filter.TypedName().Type, "pods", filteredPods) + metrics.RecordPluginProcessingLatency(FilterExtensionPoint, filter.TypedName().Type, filter.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running filter plugin successfully", "plugin", filter.TypedName(), "pods", filteredPods) if len(filteredPods) == 0 { break } } - loggerDebug.Info("After running filter plugins") + loggerDebug.Info("Completed running filter plugins successfully") return filteredPods } @@ -152,16 +152,16 @@ func (p *SchedulerProfile) runScorerPlugins(ctx context.Context, request *types. } // Iterate through each scorer in the chain and accumulate the weighted scores. for _, scorer := range p.scorers { - loggerDebug.Info("Running scorer", "scorer", scorer.TypedName().Type) + loggerDebug.Info("Running scorer plugin", "plugin", scorer.TypedName()) before := time.Now() scores := scorer.Score(ctx, cycleState, request, pods) - metrics.RecordSchedulerPluginProcessingLatency(ScorerPluginType, scorer.TypedName().Type, time.Since(before)) + metrics.RecordPluginProcessingLatency(ScorerExtensionPoint, scorer.TypedName().Type, scorer.TypedName().Name, time.Since(before)) for pod, score := range scores { // weight is relative to the sum of weights weightedScorePerPod[pod] += score * float64(scorer.Weight()) } - loggerDebug.Info("After running scorer", "scorer", scorer.TypedName().Type) + loggerDebug.Info("Completed running scorer plugin successfully", "plugin", scorer.TypedName()) } - loggerDebug.Info("After running scorer plugins") + loggerDebug.Info("Completed running scorer plugins successfully") return weightedScorePerPod } @@ -175,20 +175,22 @@ func (p *SchedulerProfile) runPickerPlugin(ctx context.Context, cycleState *type i++ } - loggerDebug.Info("Before running picker plugin", "picker", p.picker.TypedName().Type, "pods-weighted-score", fmt.Sprint(weightedScorePerPod)) + loggerDebug.Info("Running picker plugin", "plugin", p.picker.TypedName(), "pods-weighted-score", fmt.Sprint(weightedScorePerPod)) before := time.Now() result := p.picker.Pick(ctx, cycleState, scoredPods) - metrics.RecordSchedulerPluginProcessingLatency(PickerPluginType, p.picker.TypedName().Type, time.Since(before)) - loggerDebug.Info("After running picker plugin", "picker", p.picker.TypedName().Type, "result", result) + metrics.RecordPluginProcessingLatency(PickerExtensionPoint, p.picker.TypedName().Type, p.picker.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running picker plugin successfully", "plugin", p.picker.TypedName(), "result", result) return result } func (p *SchedulerProfile) runPostCyclePlugins(ctx context.Context, cycleState *types.CycleState, result *types.ProfileRunResult) { + loggerDebug := log.FromContext(ctx).V(logutil.DEBUG) for _, plugin := range p.postCyclePlugins { - log.FromContext(ctx).V(logutil.DEBUG).Info("Running post-cycle plugin", "plugin", plugin.TypedName().Type) + loggerDebug.Info("Running post-cycle plugin", "plugin", plugin.TypedName()) before := time.Now() plugin.PostCycle(ctx, cycleState, result) - metrics.RecordSchedulerPluginProcessingLatency(PostCyclePluginType, plugin.TypedName().Type, time.Since(before)) + metrics.RecordPluginProcessingLatency(PostCycleExtensionPoint, plugin.TypedName().Type, plugin.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running post-cycle plugin successfully", "plugin", plugin.TypedName()) } } diff --git a/pkg/epp/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go index a0f52ef3a..12c18833a 100644 --- a/pkg/epp/scheduling/scheduler.go +++ b/pkg/epp/scheduling/scheduler.go @@ -57,11 +57,11 @@ func (s *Scheduler) Schedule(ctx context.Context, request *types.LLMRequest, can cycleState := types.NewCycleState() for { // get the next set of profiles to run iteratively based on the request and the previous execution results - loggerDebug.Info("Running profile handler, Pick profiles", "plugin", s.profileHandler.TypedName().Type) + loggerDebug.Info("Running profile handler, Pick profiles", "plugin", s.profileHandler.TypedName()) before := time.Now() profiles := s.profileHandler.Pick(ctx, cycleState, request, s.profiles, profileRunResults) - metrics.RecordSchedulerPluginProcessingLatency(framework.ProfilePickerType, s.profileHandler.TypedName().Type, time.Since(before)) - loggerDebug.Info("After running profile handler Pick profiles", "plugin", s.profileHandler.TypedName().Type, "result", profiles) + metrics.RecordPluginProcessingLatency(framework.ProfilePickerExtensionPoint, s.profileHandler.TypedName().Type, s.profileHandler.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running profile handler Pick profiles successfully", "plugin", s.profileHandler.TypedName(), "result", profiles) if len(profiles) == 0 { // profile picker didn't pick any profile to run break } @@ -73,7 +73,7 @@ func (s *Scheduler) Schedule(ctx context.Context, request *types.LLMRequest, can if err != nil { loggerDebug.Info("failed to run scheduler profile", "profile", name, "error", err.Error()) } else { - loggerDebug.Info("After running scheduler profile succuessfully", "name", name) + loggerDebug.Info("Completed running scheduler profile succuessfully", "name", name) } profileRunResults[name] = profileRunResult // if profile failed to run, the run result is nil @@ -84,11 +84,11 @@ func (s *Scheduler) Schedule(ctx context.Context, request *types.LLMRequest, can return nil, fmt.Errorf("failed to run any scheduler profile for request %s", request.RequestId) } - loggerDebug.Info("Running profile handler, ProcessResults", "plugin", s.profileHandler.TypedName().Type) + loggerDebug.Info("Running profile handler, ProcessResults", "plugin", s.profileHandler.TypedName()) before := time.Now() result, err := s.profileHandler.ProcessResults(ctx, cycleState, request, profileRunResults) - metrics.RecordSchedulerPluginProcessingLatency(framework.ProcessProfilesResultsType, s.profileHandler.TypedName().Type, time.Since(before)) - loggerDebug.Info("After running profile handler ProcessResults", "plugin", s.profileHandler.TypedName().Type) + metrics.RecordPluginProcessingLatency(framework.ProcessProfilesResultsExtensionPoint, s.profileHandler.TypedName().Type, s.profileHandler.TypedName().Name, time.Since(before)) + loggerDebug.Info("Completed running profile handler ProcessResults successfully", "plugin", s.profileHandler.TypedName()) return result, err }