Skip to content

Commit 1cd981d

Browse files
authored
[V1][Metrics] Support vllm:cache_config_info (#13299)
1 parent fca2084 commit 1cd981d

File tree

5 files changed

+32
-12
lines changed

5 files changed

+32
-12
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
230230
"vllm:prompt_tokens_total",
231231
"vllm:generation_tokens_total",
232232
"vllm:iteration_tokens_total",
233+
"vllm:cache_config_info",
233234
"vllm:request_success_total",
234235
"vllm:request_prompt_tokens_sum",
235236
"vllm:request_prompt_tokens_bucket",

vllm/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ def compute_hash(self) -> str:
8888
...
8989

9090

91+
class SupportsMetricsInfo(Protocol):
92+
93+
def metrics_info(self) -> Dict[str, str]:
94+
...
95+
96+
9197
class ModelImpl(str, enum.Enum):
9298
AUTO = "auto"
9399
VLLM = "vllm"

vllm/engine/metrics.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@
88
import numpy as np
99
import prometheus_client
1010

11-
from vllm.config import VllmConfig
12-
from vllm.engine.metrics_types import (StatLoggerBase, Stats,
13-
SupportsMetricsInfo)
11+
from vllm.config import SupportsMetricsInfo, VllmConfig
12+
from vllm.engine.metrics_types import StatLoggerBase, Stats
1413
from vllm.executor.ray_utils import ray
1514
from vllm.logger import init_logger
1615

vllm/engine/metrics_types.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import time
1616
from abc import ABC, abstractmethod
1717
from dataclasses import dataclass
18-
from typing import Dict, List, Optional, Protocol
18+
from typing import List, Optional
1919

20-
from vllm.config import VllmConfig
20+
from vllm.config import SupportsMetricsInfo, VllmConfig
2121
from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
2222

2323

@@ -70,12 +70,6 @@ class Stats:
7070
spec_decode_metrics: Optional["SpecDecodeWorkerMetrics"] = None
7171

7272

73-
class SupportsMetricsInfo(Protocol):
74-
75-
def metrics_info(self) -> Dict[str, str]:
76-
...
77-
78-
7973
class StatLoggerBase(ABC):
8074
"""Base class for StatLogger."""
8175

vllm/v1/metrics/loggers.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
import prometheus_client
99

10-
from vllm.config import VllmConfig
10+
from vllm.config import SupportsMetricsInfo, VllmConfig
1111
from vllm.logger import init_logger
1212
from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics
1313
from vllm.v1.engine import FinishReason
@@ -228,6 +228,26 @@ def __init__(self, vllm_config: VllmConfig):
228228
buckets=request_latency_buckets,
229229
labelnames=labelnames).labels(*labelvalues)
230230

231+
self.log_metrics_info("cache_config", vllm_config.cache_config)
232+
233+
def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo):
234+
metrics_info = config_obj.metrics_info()
235+
236+
name, documentation = None, None
237+
if type == "cache_config":
238+
name = "vllm:cache_config_info"
239+
documentation = "Information of the LLMEngine CacheConfig"
240+
assert name is not None, f"Unknown metrics info type {type}"
241+
242+
# Info type metrics are syntactic sugar for a gauge permanently set to 1
243+
# Since prometheus multiprocessing mode does not support Info, emulate
244+
# info here with a gauge.
245+
info_gauge = prometheus_client.Gauge(
246+
name=name,
247+
documentation=documentation,
248+
labelnames=metrics_info.keys()).labels(**metrics_info)
249+
info_gauge.set(1)
250+
231251
def log(self, scheduler_stats: SchedulerStats,
232252
iteration_stats: IterationStats):
233253
"""Log to prometheus."""

0 commit comments

Comments
 (0)