Skip to content

Commit 53e39cc

Browse files
committed
[V1][Metrics] Support vllm:cache_config_info
prometheus_client has support for Info metrics which are equivalent to a Gauge whose value is permanently set to 1, but exposes interesting key/value pair information via labels. This is used for information about an instance that does not change - so it only needs to be observed at startup - and allows comparing across instances in Prometheus. We use this concept for the vllm:cache_config_info metric: ``` vllm:cache_config_info{block_size="16",cache_dtype="auto",calculate_kv_scales="False",cpu_offload_gb="0",enable_prefix_caching="False",gpu_memory_utilization="0.9",...} 1.0 ``` However, prometheus_client has never supported Info metrics in multiprocessing mode - for unclear reasons. We simply use a Gauge metric set to 1 and multiprocess_mode="mostrecent" instead. Signed-off-by: Mark McLoughlin <[email protected]>
1 parent 367cb8c commit 53e39cc

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
230230
"vllm:prompt_tokens_total",
231231
"vllm:generation_tokens_total",
232232
"vllm:iteration_tokens_total",
233+
"vllm:cache_config_info",
233234
"vllm:request_success_total",
234235
"vllm:request_prompt_tokens_sum",
235236
"vllm:request_prompt_tokens_bucket",

vllm/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@ def compute_hash(self) -> str:
8484
...
8585

8686

87+
class SupportsMetricsInfo(Protocol):
88+
89+
def metrics_info(self) -> Dict[str, str]:
90+
...
91+
92+
8793
class ModelImpl(str, enum.Enum):
8894
AUTO = "auto"
8995
VLLM = "vllm"

vllm/engine/metrics.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
import numpy as np
99
import prometheus_client
1010

11-
from vllm.config import VllmConfig
12-
from vllm.engine.metrics_types import (StatLoggerBase, Stats,
13-
SupportsMetricsInfo)
11+
from vllm.config import SupportsMetricsInfo, VllmConfig
12+
from vllm.engine.metrics_types import StatLoggerBase, Stats
1413
from vllm.executor.ray_utils import ray
1514
from vllm.logger import init_logger
1615

vllm/engine/metrics_types.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import time
1616
from abc import ABC, abstractmethod
1717
from dataclasses import dataclass
18-
from typing import Dict, List, Optional, Protocol
18+
from typing import List, Optional
1919

20-
from vllm.config import VllmConfig
20+
from vllm.config import SupportsMetricsInfo, VllmConfig
2121
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
2222

2323

@@ -70,12 +70,6 @@ class Stats:
7070
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
7171

7272

73-
class SupportsMetricsInfo(Protocol):
74-
75-
def metrics_info(self) -> Dict[str, str]:
76-
...
77-
78-
7973
class StatLoggerBase(ABC):
8074
"""Base class for StatLogger."""
8175

vllm/v1/metrics/loggers.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import prometheus_client
99

10-
from vllm.config import VllmConfig
10+
from vllm.config import SupportsMetricsInfo, VllmConfig
1111
from vllm.logger import init_logger
1212
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
1313
from vllm.v1.engine import FinishReason
@@ -228,6 +228,26 @@ def __init__(self, vllm_config: VllmConfig):
228228
buckets=request_latency_buckets,
229229
labelnames=labelnames).labels(*labelvalues)
230230

231+
self.log_metrics_info("cache_config", vllm_config.cache_config)
232+
233+
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
234+
metrics_info = config_obj.metrics_info()
235+
236+
name, documentation = None, None
237+
if type == "cache_config":
238+
name = "vllm:cache_config_info"
239+
documentation = "Information of the LLMEngine CacheConfig"
240+
assert name is not None, f"Unknown metrics info type {type}"
241+
242+
# Info type metrics are syntactic sugar for a gauge permanently set to 1
243+
# Since prometheus multiprocessing mode does not support Info, emulate
244+
# info here with a gauge.
245+
info_gauge = prometheus_client.Gauge(
246+
name=name,
247+
documentation=documentation,
248+
labelnames=metrics_info.keys()).labels(**metrics_info)
249+
info_gauge.set(1)
250+
231251
def log(self, scheduler_stats: SchedulerStats,
232252
iteration_stats: IterationStats):
233253
"""Log to prometheus."""

0 commit comments

Comments
 (0)