From f3c5ca81ce91da1e0f368e6a73fb9a9eaca5a715 Mon Sep 17 00:00:00 2001 From: Fangping Shi Date: Fri, 17 Oct 2025 16:21:09 -0700 Subject: [PATCH 1/2] fix: kv cache panel in grafana.json Signed-off-by: Fangping Shi --- examples/online_serving/prometheus_grafana/grafana.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/online_serving/prometheus_grafana/grafana.json b/examples/online_serving/prometheus_grafana/grafana.json index 37abc9de926f..1c89d4593830 100644 --- a/examples/online_serving/prometheus_grafana/grafana.json +++ b/examples/online_serving/prometheus_grafana/grafana.json @@ -852,7 +852,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}", + "expr": "vllm:kv_cache_usage_perc{model_name=\"$model_name\"}", "instant": false, "legendFormat": "GPU Cache Usage", "range": true, From 8906e861eae3aa5c036c78df3aaa55bc6359a674 Mon Sep 17 00:00:00 2001 From: Fangping Shi Date: Mon, 20 Oct 2025 09:54:06 -0700 Subject: [PATCH 2/2] replace deprecated metrics under pserses Signed-off-by: Fangping Shi --- .../dashboards/perses/performance_statistics.yaml | 2 +- .../dashboards/perses/query_statistics.yaml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/online_serving/dashboards/perses/performance_statistics.yaml b/examples/online_serving/dashboards/perses/performance_statistics.yaml index 2e8d24c3324b..8030fe2f00a9 100644 --- a/examples/online_serving/dashboards/perses/performance_statistics.yaml +++ b/examples/online_serving/dashboards/perses/performance_statistics.yaml @@ -530,7 +530,7 @@ spec: name: accelerators-thanos-querier-datasource # Multiply by 100 so we can read it as a percentage without setting a unit (avoids CUE unit conflicts) query: > - 100 * avg(vllm:gpu_cache_usage_perc) + 100 * avg(vllm:kv_cache_usage_perc) "18": kind: Panel diff --git a/examples/online_serving/dashboards/perses/query_statistics.yaml b/examples/online_serving/dashboards/perses/query_statistics.yaml index 28109aae8151..ad8e047f6dfe 100644 --- a/examples/online_serving/dashboards/perses/query_statistics.yaml +++ b/examples/online_serving/dashboards/perses/query_statistics.yaml @@ -98,7 +98,7 @@ spec: kind: PrometheusTimeSeriesQuery spec: datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource } - query: avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0) + query: avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) or vector(0) minStep: "15s" core_running_ts: @@ -168,7 +168,7 @@ spec: spec: datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource } # multiply by 100 to present percentage; omit format.unit to avoid schema conflicts - query: (avg(vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) + query: (avg(vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) minStep: "15s" core_kv_usage_pct_ts: @@ -187,7 +187,7 @@ spec: kind: PrometheusTimeSeriesQuery spec: datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource } - query: (avg by (service) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) + query: (avg by (service) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) minStep: "15s" # --- Per-Pod breakdowns (works on Simulator & Real) --- @@ -246,7 +246,7 @@ spec: spec: datasource: { kind: PrometheusDatasource, name: accelerators-thanos-querier-datasource } # if your exporter labels kv metric with pod (the sim does), this works; otherwise it will just return empty - query: (avg by (pod) (vllm:gpu_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) + query: (avg by (pod) (vllm:kv_cache_usage_perc{namespace="$NS",service="$SVC"}) * 100) or vector(0) minStep: "15s" # --- Real vLLM only (zeros on simulator) ---