@@ -46,6 +46,7 @@ type ExtProcServerRunner struct {
4646 GrpcPort int
4747 DestinationEndpointHintMetadataNamespace string
4848 DestinationEndpointHintKey string
49+ FairnessIDHeaderKey string
4950 PoolNamespacedName types.NamespacedName
5051 Datastore datastore.Datastore
5152 SecureServing bool
@@ -63,24 +64,25 @@ type ExtProcServerRunner struct {
6364
6465// Default values for CLI flags in main
6566const (
66- DefaultGrpcPort = 9002 // default for --grpc-port
67- DefaultGrpcHealthPort = 9003 // default for --grpc-health-port
68- DefaultMetricsPort = 9090 // default for --metrics-port
69- DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace
70- DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destination-endpoint-hint-key
71- DefaultPoolName = "" // required but no default
72- DefaultPoolNamespace = "default" // default for --pool-namespace
73- DefaultRefreshMetricsInterval = 50 * time .Millisecond // default for --refresh-metrics-interval
74- DefaultRefreshPrometheusMetricsInterval = 5 * time .Second // default for --refresh-prometheus-metrics-interval
75- DefaultSecureServing = true // default for --secure-serving
76- DefaultHealthChecking = false // default for --health-checking
77- DefaultEnablePprof = true // default for --enable-pprof
78- DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --total-queued-requests-metric
79- DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kv-cache-usage-percentage-metric
80- DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --lora-info-metric
81- DefaultCertPath = "" // default for --cert-path
82- DefaultConfigFile = "" // default for --config-file
83- DefaultConfigText = "" // default for --config-text
67+ DefaultGrpcPort = 9002 // default for --grpc-port
68+ DefaultGrpcHealthPort = 9003 // default for --grpc-health-port
69+ DefaultMetricsPort = 9090 // default for --metrics-port
70+ DefaultDestinationEndpointHintMetadataNamespace = "envoy.lb" // default for --destinationEndpointHintMetadataNamespace
71+ DefaultDestinationEndpointHintKey = "x-gateway-destination-endpoint" // default for --destination-endpoint-hint-key
72+ DefaultFairnessIDHeaderKey = "x-gateway-inference-fairness-id" // default for --fairness-id-header-key
73+ DefaultPoolName = "" // required but no default
74+ DefaultPoolNamespace = "default" // default for --pool-namespace
75+ DefaultRefreshMetricsInterval = 50 * time .Millisecond // default for --refresh-metrics-interval
76+ DefaultRefreshPrometheusMetricsInterval = 5 * time .Second // default for --refresh-prometheus-metrics-interval
77+ DefaultSecureServing = true // default for --secure-serving
78+ DefaultHealthChecking = false // default for --health-checking
79+ DefaultEnablePprof = true // default for --enable-pprof
80+ DefaultTotalQueuedRequestsMetric = "vllm:num_requests_waiting" // default for --total-queued-requests-metric
81+ DefaultKvCacheUsagePercentageMetric = "vllm:gpu_cache_usage_perc" // default for --kv-cache-usage-percentage-metric
82+ DefaultLoraInfoMetric = "vllm:lora_requests_info" // default for --lora-info-metric
83+ DefaultCertPath = "" // default for --cert-path
84+ DefaultConfigFile = "" // default for --config-file
85+ DefaultConfigText = "" // default for --config-text
8486 DefaultMetricsStalenessThreshold = 2 * time .Second
8587)
8688
@@ -91,6 +93,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
9193 GrpcPort : DefaultGrpcPort ,
9294 DestinationEndpointHintKey : DefaultDestinationEndpointHintKey ,
9395 DestinationEndpointHintMetadataNamespace : DefaultDestinationEndpointHintMetadataNamespace ,
96+ FairnessIDHeaderKey : DefaultFairnessIDHeaderKey ,
9497 PoolNamespacedName : types.NamespacedName {Name : DefaultPoolName , Namespace : DefaultPoolNamespace },
9598 SecureServing : DefaultSecureServing ,
9699 HealthChecking : DefaultHealthChecking ,
@@ -159,6 +162,7 @@ func (r *ExtProcServerRunner) AsRunnable(logger logr.Logger) manager.Runnable {
159162 extProcServer := handlers .NewStreamingServer (
160163 r .DestinationEndpointHintMetadataNamespace ,
161164 r .DestinationEndpointHintKey ,
165+ r .FairnessIDHeaderKey ,
162166 r .Datastore ,
163167 r .Director ,
164168 )
0 commit comments