From 81a06efed98eedfb6d0b3c98b1f80c5f2da0e9ee Mon Sep 17 00:00:00 2001 From: yafengio Date: Tue, 8 Jul 2025 16:45:49 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=F0=9F=94=A7=20unify=20the=20default=20?= =?UTF-8?q?parameter=20value=20positions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/epp/runner/runner.go | 53 ++++++++++++++++++++++++++----------- pkg/epp/server/runserver.go | 9 ++++++- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/cmd/epp/runner/runner.go b/cmd/epp/runner/runner.go index 45acf1a95..fee047ffd 100644 --- a/cmd/epp/runner/runner.go +++ b/cmd/epp/runner/runner.go @@ -64,10 +64,12 @@ var ( "The gRPC port used for communicating with Envoy proxy") grpcHealthPort = flag.Int( "grpcHealthPort", - 9003, + runserver.DefaultGrpcHealthPort, "The port used for gRPC liveness and readiness probes") metricsPort = flag.Int( - "metricsPort", 9090, "The metrics port") + "metricsPort", + runserver.DefaultMetricsPort, + "The metrics port") destinationEndpointHintKey = flag.String( "destinationEndpointHintKey", runserver.DefaultDestinationEndpointHintKey, @@ -93,28 +95,47 @@ var ( "refreshPrometheusMetricsInterval", runserver.DefaultRefreshPrometheusMetricsInterval, "interval to flush prometheus metrics") - logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") + logVerbosity = flag.Int( + "v", + logging.DEFAULT, + "number for the log level verbosity") secureServing = flag.Bool( - "secureServing", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") - healthChecking = flag.Bool("healthChecking", runserver.DefaultHealthChecking, "Enables health checking") - certPath = flag.String( - "certPath", "", "The path to the certificate for secure serving. The certificate and private key files "+ + "secureServing", + runserver.DefaultSecureServing, + "Enables secure serving. Defaults to true.") + healthChecking = flag.Bool( + "healthChecking", + runserver.DefaultHealthChecking, + "Enables health checking") + certPath = flag.String( + "certPath", + runserver.DefaultCertPath, + "The path to the certificate for secure serving. The certificate and private key files "+ "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+ "then a self-signed certificate is used.") // metric flags - totalQueuedRequestsMetric = flag.String("totalQueuedRequestsMetric", - "vllm:num_requests_waiting", + totalQueuedRequestsMetric = flag.String( + "totalQueuedRequestsMetric", + runserver.DefaultTotalQueuedRequestsMetric, "Prometheus metric for the number of queued requests.") - kvCacheUsagePercentageMetric = flag.String("kvCacheUsagePercentageMetric", - "vllm:gpu_cache_usage_perc", + kvCacheUsagePercentageMetric = flag.String( + "kvCacheUsagePercentageMetric", + runserver.DefaultKvCacheUsagePercentageMetric, "Prometheus metric for the fraction of KV-cache blocks currently in use (from 0 to 1).") // LoRA metrics - loraInfoMetric = flag.String("loraInfoMetric", - "vllm:lora_requests_info", + loraInfoMetric = flag.String( + "loraInfoMetric", + runserver.DefaultLoraInfoMetric, "Prometheus metric for the LoRA info metrics (must be in vLLM label format).") // configuration flags - configFile = flag.String("configFile", "", "The path to the configuration file") - configText = flag.String("configText", "", "The configuration specified as text, in lieu of a file") + configFile = flag.String( + "configFile", + runserver.DefaultConfigFile, + "The path to the configuration file") + configText = flag.String( + "configText", + runserver.DefaultConfigText, + "The configuration specified as text, in lieu of a file") setupLog = ctrl.Log.WithName("setup") @@ -405,7 +426,7 @@ func validateFlags() error { return fmt.Errorf("required %q flag not set", "poolName") } if *configText != "" && *configFile != "" { - return fmt.Errorf("both the %s and %s flags can not be set at the same time", "configText", "configFile") + return fmt.Errorf("both the %q and %q flags can not be set at the same time", "configText", "configFile") } return nil diff --git a/pkg/epp/server/runserver.go b/pkg/epp/server/runserver.go index 7b79ae90b..67dc78ede 100644 --- a/pkg/epp/server/runserver.go +++ b/pkg/epp/server/runserver.go @@ -20,7 +20,6 @@ import ( "context" "crypto/tls" "fmt" - "time" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" @@ -63,6 +62,8 @@ type ExtProcServerRunner struct { // Default values for CLI flags in main const ( DefaultGrpcPort = 9002 // default for --grpcPort + DefaultGrpcHealthPort = 9003 // default for --grpcHealthPort + DefaultMetricsPort = 9090 // default for --metricsPort DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destinationEndpointHintKey DefaultPoolName = "" // required but no default @@ -71,6 +72,12 @@ const ( DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval DefaultSecureServing = true // default for --secureServing DefaultHealthChecking = false // default for --healthChecking + DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --totalQueuedRequestsMetric + DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kvCacheUsagePercentageMetric + DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --loraInfoMetric + DefaultCertPath = "" // default for --certPath + DefaultConfigFile = "" // default for --configFile + DefaultConfigText = "" // default for --configText ) // NewDefaultExtProcServerRunner creates a runner with default values.