|
7 | 7 | import numpy as np |
8 | 8 | import prometheus_client |
9 | 9 |
|
10 | | -from vllm.config import VllmConfig |
| 10 | +from vllm.config import SupportsMetricsInfo, VllmConfig |
11 | 11 | from vllm.logger import init_logger |
12 | 12 | from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics |
13 | 13 | from vllm.v1.engine import FinishReason |
@@ -228,6 +228,26 @@ def __init__(self, vllm_config: VllmConfig): |
228 | 228 | buckets=request_latency_buckets, |
229 | 229 | labelnames=labelnames).labels(*labelvalues) |
230 | 230 |
|
| 231 | + self.log_metrics_info("cache_config", vllm_config.cache_config) |
| 232 | + |
| 233 | + def log_metrics_info(self, type: str, config_obj: SupportsMetricsInfo): |
| 234 | + metrics_info = config_obj.metrics_info() |
| 235 | + |
| 236 | + name, documentation = None, None |
| 237 | + if type == "cache_config": |
| 238 | + name = "vllm:cache_config_info" |
| 239 | + documentation = "Information of the LLMEngine CacheConfig" |
| 240 | + assert name is not None, f"Unknown metrics info type {type}" |
| 241 | + |
| 242 | + # Info type metrics are syntactic sugar for a gauge permanently set to 1 |
| 243 | + # Since prometheus multiprocessing mode does not support Info, emulate |
| 244 | + # info here with a gauge. |
| 245 | + info_gauge = prometheus_client.Gauge( |
| 246 | + name=name, |
| 247 | + documentation=documentation, |
| 248 | + labelnames=metrics_info.keys()).labels(**metrics_info) |
| 249 | + info_gauge.set(1) |
| 250 | + |
231 | 251 | def log(self, scheduler_stats: SchedulerStats, |
232 | 252 | iteration_stats: IterationStats): |
233 | 253 | """Log to prometheus.""" |
|
0 commit comments