diff --git a/helm-charts/assets/opea-metrics.png b/helm-charts/assets/opea-metrics.png new file mode 100644 index 000000000..efc174687 Binary files /dev/null and b/helm-charts/assets/opea-metrics.png differ diff --git a/helm-charts/assets/opea-scaling.png b/helm-charts/assets/opea-scaling.png new file mode 100644 index 000000000..d564cdb22 Binary files /dev/null and b/helm-charts/assets/opea-scaling.png differ diff --git a/helm-charts/chatqna/Chart.yaml b/helm-charts/chatqna/Chart.yaml index a23a36668..69ec61dca 100644 --- a/helm-charts/chatqna/Chart.yaml +++ b/helm-charts/chatqna/Chart.yaml @@ -6,6 +6,10 @@ name: chatqna description: The Helm chart to deploy ChatQnA type: application dependencies: + - name: dashboard + version: 0-latest + repository: "file://../common/dashboard" + condition: global.monitoring - name: tgi version: 0-latest alias: tgi-guardrails diff --git a/helm-charts/chatqna/hpa-values.yaml b/helm-charts/chatqna/hpa-values.yaml index d7613e2af..2e10952f9 100644 --- a/helm-charts/chatqna/hpa-values.yaml +++ b/helm-charts/chatqna/hpa-values.yaml @@ -9,6 +9,9 @@ # Default upstream configMap is in: # - https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/deploy/manifests/config-map.yaml +dashboard: + scaling: true + autoscaling: enabled: true diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml index a23ca2de6..c6834ee6d 100644 --- a/helm-charts/chatqna/values.yaml +++ b/helm-charts/chatqna/values.yaml @@ -115,6 +115,9 @@ chatqna-ui: tag: "latest" containerPort: "5173" +dashboard: + prefix: "OPEA ChatQnA" + global: http_proxy: "" https_proxy: "" @@ -129,8 +132,11 @@ global: # modelUseHostPath: /mnt/opea-models # modelUsePVC: model-volume - # Install Prometheus serviceMonitors for service components + # Prometheus monitoring + Grafana dashboard(s) for service components? monitoring: false + # Prometheus/Grafana namespace for Dashboard installation + prometheusNamespace: monitoring + # Prometheus Helm install release name needed for serviceMonitors prometheusRelease: prometheus-stack diff --git a/helm-charts/common/dashboard/Chart.yaml b/helm-charts/common/dashboard/Chart.yaml new file mode 100644 index 000000000..2385197cd --- /dev/null +++ b/helm-charts/common/dashboard/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +name: dashboard +description: Grafana dashboard(s) Helm chart for OPEA application(s) +type: application +version: 0-latest +appVersion: "1.0" diff --git a/helm-charts/common/dashboard/README.md b/helm-charts/common/dashboard/README.md new file mode 100644 index 000000000..451ba994a --- /dev/null +++ b/helm-charts/common/dashboard/README.md @@ -0,0 +1,27 @@ +# Dashboards + +Helm chart for installing Grafana dashboard(s) for OPEA application(s). + +## Installing the Chart + +After cluster [monitoring support](../../monitoring.md) with Prometheus + Grafana, +and some OPEA application(s) with monitoring enabled (`--set global.monitoring=true`) +have been [installed](../../README.md) to the cluster, Grafana dashboard(s) for +those OPEA application(s) can be installed with: + +```console +cd GenAIInfra/helm-charts/common +PROM_RELEASE=prometheus-stack # release name for Prometheus/Grafana Helm install +PROM_NAMESPACE=monitoring # namespace where they are installed +helm install opea-dashboard dashboard/ --set global.prometheusNamespace=PROM_NAMESPACE --set global.prometheusRelease=$PROM_RELEASE +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------- | ------ | ------------------ | ------------------------------------------------------------------------------------------------------- | +| prefix | string | `OPEA application` | Title prefix for the installed dashboards | +| metrics | bool | `true` | Whether to install metrics dashboard for the application services | +| scaling | bool | `false` | Whether to install scaling dashboard for the application services scaling, use with [HPA](../../HPA.md) | +| global.promeheusNamespace | string | `monitoring` | Namespace where Prometheus/Grafana are installed, needed to for Grafana to pick up installed dashboard | +| global.promeheusRelease | string | `prometheus-stack` | Release name for Prometheus/Grafana Helm install, needed to for Grafana to pick up installed dashboard | diff --git a/helm-charts/common/dashboard/templates/_helpers.tpl b/helm-charts/common/dashboard/templates/_helpers.tpl new file mode 100644 index 000000000..5dd6d7775 --- /dev/null +++ b/helm-charts/common/dashboard/templates/_helpers.tpl @@ -0,0 +1,17 @@ +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "dashboard.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} diff --git a/helm-charts/common/dashboard/templates/configmap-metrics.yaml b/helm-charts/common/dashboard/templates/configmap-metrics.yaml new file mode 100644 index 000000000..9bf7edaa3 --- /dev/null +++ b/helm-charts/common/dashboard/templates/configmap-metrics.yaml @@ -0,0 +1,2068 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.metrics }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: {{ include "dashboard.fullname" . }}-metrics + namespace: {{ .Values.global.prometheusNamespace }} +data: + {{ include "dashboard.fullname" . }}-metrics.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 46, + "panels": [], + "title": "Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 1 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(te_request_duration_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(te_request_duration_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rerank", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(tgi_request_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:e2e_request_latency_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "C" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 1 + }, + "id": 33, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_request_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(te_request_duration_sum{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]) / rate(te_request_duration_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(te_request_duration_sum{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]) / rate(te_request_duration_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Rerank", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(tgi_request_duration_sum{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]) / rate(tgi_request_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:e2e_request_latency_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:e2e_request_latency_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "C" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 1 + }, + "id": 40, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_request_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{ printf "{{le}}" }}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 1 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_request_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 47, + "panels": [], + "title": "First tokens", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 9 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:time_to_first_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "A" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 9 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_first_token_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_first_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:time_to_first_token_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:time_to_first_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 9 + }, + "id": 41, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_first_token_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{ printf "{{le}}" }}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 9 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_first_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 48, + "panels": [], + "title": "Next tokens", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 17 + }, + "id": 43, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(tgi_request_mean_time_per_token_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(rate(vllm:time_per_output_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Rates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 17 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_inter_token_latency_sum{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(tgi_request_mean_time_per_token_duration_sum{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]) / rate(tgi_request_mean_time_per_token_duration_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "TGI", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(vllm:time_per_output_token_seconds_sum{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]) / rate(vllm:time_per_output_token_seconds_count{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Latency averages", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 17 + }, + "id": 45, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Spectral", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (le)(increase(megaservice_inter_token_latency_bucket{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "format": "heatmap", + "hide": false, + "legendFormat": "{{ printf "{{le}}" }}", + "range": true, + "refId": "A" + } + ], + "title": "E2E Latency histogram", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 19, + "y": 17 + }, + "id": 51, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.9, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p90", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(megaservice_inter_token_latency_bucket{namespace=\"$namespace\",service=\"$release\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "E2E Latency quantiles", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 49, + "panels": [], + "title": "Inference services", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 25 + }, + "id": 22, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI: instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI: used", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank: instances", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank: used", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed: instances", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed: used", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM: instances", + "range": true, + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(vllm:cache_config_info{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM: used", + "range": true, + "refId": "H" + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 5, + "y": 25 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Embed", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Rerank", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (running)", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (waiting)", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_swapped{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM (swapped to CPU)", + "range": true, + "refId": "F" + } + ], + "title": "Queue totals", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 12, + "y": 25 + }, + "id": 34, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "avg(rate(megaservice_inter_token_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]) / rate(megaservice_request_latency_count{service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "E2E", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "(sum by (service)(rate(vllm:generation_tokens_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval]))) / (sum by (service)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "vLLM", + "range": true, + "refId": "B" + } + ], + "title": "Tokens / reply (average)", + "type": "timeseries" + } + ], + "refresh": false, + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "description": "Prometheus instance", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "Metrics", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_first_token_latency_count,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_first_token_latency_count,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "chatqna", + "value": "chatqna" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_first_token_latency_count{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "description": "Helm release name used as prefix for the services", + "hide": 0, + "includeAll": false, + "label": "Helm release", + "multi": false, + "name": "release", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_first_token_latency_count{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "{{ .Values.prefix }} metrics", + "uid": "opea-{{ include "dashboard.fullname" . }}-metrics", + "version": 1, + "weekStart": "" + } +{{- end }} diff --git a/helm-charts/common/dashboard/templates/configmap-scaling.yaml b/helm-charts/common/dashboard/templates/configmap-scaling.yaml new file mode 100644 index 000000000..a0aa4b8f7 --- /dev/null +++ b/helm-charts/common/dashboard/templates/configmap-scaling.yaml @@ -0,0 +1,2307 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.scaling }} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + grafana_dashboard: "1" + name: {{ include "dashboard.fullname" . }}-scaling + namespace: {{ .Values.global.prometheusNamespace }} +data: + {{ include "dashboard.fullname" . }}-scaling.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 7, + "panels": [], + "title": "Scaling", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "used", + "range": true, + "refId": "C" + } + ], + "title": "Megaservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI instances", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-teirerank\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI used", + "range": true, + "refId": "B" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-retriever-usvc\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "Used", + "range": true, + "refId": "C" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI instances", + "range": true, + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(te_queue_size{service=\"$release-tei\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TEI used", + "range": true, + "refId": "D" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-llm-uservice\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "Instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(sum by (pod)(http_requests_total{method=\"POST\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}))", + "hide": false, + "legendFormat": "Used", + "range": true, + "refId": "C" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI instances", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(tgi_queue_size{service=\"$release-tgi\",namespace=\"$namespace\"})", + "hide": false, + "legendFormat": "TGI used", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(up{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM instances", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "count(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "vLLM used", + "range": true, + "refId": "D" + } + ], + "title": "LLM", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 6, + "panels": [], + "title": "Completed requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Megaservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 8 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 8 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 8 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(http_requests_total{method=\"POST\",status=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "max(sum by(pod)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Most", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "min(sum by(pod)(rate(vllm:request_success_total{service=\"$release-vllm\",namespace=\"$namespace\"}[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "Least", + "range": true, + "refId": "D" + } + ], + "title": "LLM", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 12, + "panels": [], + "title": "Incomplete requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 16 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (service)(megaservice_request_pending{service=\"$release\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Pending total", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "Megaservice: fail rate + pending count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(te_request_count{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval])-rate(te_request_success{service=\"$release-teirerank\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + } + ], + "title": "Rerank", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release-retriever-usvc\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "Retrieve", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(te_request_count{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval])-rate(te_request_success{service=\"$release-tei\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + } + ], + "title": "Embed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total{method=\"POST\",status!=\"2xx\",service=\"$release-llm-uservice\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Failed total", + "range": true, + "refId": "B" + } + ], + "title": "LLM-uservice", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(rate(tgi_request_count{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval])-rate(tgi_request_success{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum by (err)(rate(tgi_request_failure{service=\"$release-tgi\",namespace=\"$namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "{{ printf "{{err}}" }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_running{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_waiting{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Waiting", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "editorMode": "code", + "expr": "sum(vllm:num_requests_swapped{service=\"$release-vllm\",namespace=\"$namespace\"})", + "hide": false, + "instant": false, + "legendFormat": "Swapped to CPU", + "range": true, + "refId": "E" + } + ], + "title": "LLM", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "prometheus" + }, + "description": "Prometheus instance", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "Metrics", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending,namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending,namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "chatqna", + "value": "chatqna" + }, + "datasource": { + "type": "prometheus", + "uid": "${Metrics}" + }, + "definition": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "description": "Helm release name used as prefix for the services", + "hide": 0, + "includeAll": false, + "label": "Helm release", + "multi": false, + "name": "release", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(megaservice_request_pending{namespace=\"$namespace\", service!~\".*-uservice\", service!~\".*-usvc\"},service)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "{{ .Values.prefix }} scaling", + "uid": "opea-{{ include "dashboard.fullname" . }}-scaling", + "version": 1, + "weekStart": "" + } +{{- end }} diff --git a/helm-charts/common/dashboard/values.yaml b/helm-charts/common/dashboard/values.yaml new file mode 100644 index 000000000..55df332d3 --- /dev/null +++ b/helm-charts/common/dashboard/values.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# dashboard to install +metrics: true +scaling: false + +# title prefix for dashboard +prefix: "OPEA application" + +global: + # Prometheus Helm install release name + prometheusRelease: prometheus-stack + # Namespace where Prometheus/Grafana are installed + prometheusNamespace: monitoring diff --git a/helm-charts/docsum/Chart.yaml b/helm-charts/docsum/Chart.yaml index 8b963c502..4d02035f8 100644 --- a/helm-charts/docsum/Chart.yaml +++ b/helm-charts/docsum/Chart.yaml @@ -6,6 +6,10 @@ name: docsum description: The Helm chart to deploy DocSum type: application dependencies: + - name: dashboard + version: 0-latest + repository: "file://../common/dashboard" + condition: global.monitoring - name: tgi version: 0-latest repository: "file://../common/tgi" diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 56509b309..a122df98b 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -100,6 +100,9 @@ docsum-ui: # service: # type: ClusterIP +dashboard: + prefix: "OPEA DocSum" + global: http_proxy: "" https_proxy: "" @@ -114,8 +117,11 @@ global: # modelUseHostPath: /mnt/opea-models # modelUsePVC: model-volume - # Install Prometheus serviceMonitors for service components + # Prometheus monitoring + Grafana dashboard(s) for service components? monitoring: false + # Prometheus/Grafana namespace for Dashboard installation + prometheusNamespace: monitoring + # Prometheus Helm install release name needed for serviceMonitors prometheusRelease: prometheus-stack diff --git a/helm-charts/monitoring.md b/helm-charts/monitoring.md index f4bc21ed0..af8a56f2a 100644 --- a/helm-charts/monitoring.md +++ b/helm-charts/monitoring.md @@ -1,34 +1,38 @@ -# Monitoring support +# Observability for OPEA Workloads in Kubernetes ## Table of Contents - [Introduction](#introduction) - [Pre-conditions](#pre-conditions) - - [Prometheus install](#prometheus-install) - - [Helm options](#helm-options) + - [Prometheus + Grafana install](#prometheus--grafana-install) + - [OPEA Helm options](#opea-helm-options) - [Install](#install) + - [Monitoring support + Grafana access](#monitoring-support--grafana-access) + - [Dashboards](#dashboards) - [Verify](#verify) +- [Dashboards](#dashboards) ## Introduction -Monitoring provides service component usage metrics for [Prometheus](https://prometheus.io/), -which can be visualized e.g. in [Grafana](https://grafana.com/). +Helm chart `monitoring` option enables observability support for the OPEA workloads; +[Prometheus](https://prometheus.io/) metrics for the service components, +and [Grafana](https://grafana.com/) visualization for them. Scaling the services automatically based on their usage with [HPA](HPA.md) also relies on these metrics. -[Observability documentation](../kubernetes-addons/Observability/README.md) +[Metrics / visualization add-ons](../kubernetes-addons/Observability/README.md) explains how to install additional monitoring for node and device metrics, and Grafana for visualizing those metrics. ## Pre-conditions -### Prometheus install +### Prometheus + Grafana install If cluster does not run [Prometheus operator](https://github.com/prometheus-operator/kube-prometheus) yet, it SHOULD be be installed before enabling monitoring, e.g. by using a Helm chart for it: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack -To install (older version) of Prometheus: +To install (older 55.x version) of Prometheus & Grafana: ```console $ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts @@ -38,16 +42,51 @@ $ kubectl create ns $prom_ns $ helm install prometheus-stack prometheus-community/kube-prometheus-stack --version 55.5.2 -n $prom_ns ``` -### Helm options +### OPEA Helm options -If Prometheus is installed under some other release name than `prometheus-stack`, +If Prometheus & Grafana are installed under some other release name than `prometheus-stack`, provide that as `global.prometheusRelease` value for the OPEA service Helm install, or in its `values.yaml` file. Otherwise Prometheus ignores the installed `serviceMonitor` objects. ## Install -Install Helm chart with `--set global.monitoring=true` option. +### Monitoring support + Grafana access + +Install (e.g. ChatQnA) Helm chart with `--set global.monitoring=true` option. + +Use port-forward to access Grafana + +``` +kubectl port-forward service/grafana 3000:80 +``` + +Open your browser and navigate to http://localhost:3000. Use +"admin/prom-operator" as the username and the password to login. + +### Dashboards + +Currently, when `monitoring` option is enabled for ChatQnA and DocSum +Helm charts, also OPEA application monitoring dashboard is installed: + +![Metrics dashboard](./assets/opea-metrics.png) + +When [HPA scaling](HPA.md) is enabled, additional application scaling +dashboard is installed: + +![Scaling dashboard](./assets/opea-scaling.png) + +For other applications, if they were installed with `monitoring` +option enabled, dashboard(s) for monitoring them can be installed +afterwards, with: + +``` +$ helm install dashboards dashboards/ --set global.monitoring=true +``` + +NOTE: dashboards will list available applications and their metrics +only after they've processed their first token, because related +metrics are not available before that! ## Verify diff --git a/kubernetes-addons/Observability/README.md b/kubernetes-addons/Observability/README.md index 4f53cfccd..022e768b3 100644 --- a/kubernetes-addons/Observability/README.md +++ b/kubernetes-addons/Observability/README.md @@ -1,46 +1,20 @@ -# How-To Setup Observability for OPEA Workload in Kubernetes +# Metrics / visualization add-ons -This guide provides a step-by-step approach to setting up observability for the OPEA workload in a Kubernetes environment. We will cover the setup of Prometheus and Grafana, as well as the collection of metrics for Gaudi hardware, OPEA/chatqna including TGI, TEI-Embedding, TEI-Reranking and other microservices, and PCM. +Table of Contents -For monitoring Helm installed OPEA applications, see [Helm monitoring option](../../helm-charts/monitoring.md). +- [Pre-conditions](#pre-conditions) +- [Device metrics for Gaudi HW](#device-metrics-for-gaudi-hw) +- [Extra metrics for OPEA applications](#extra-metrics-for-opea-applications) +- [CPU metrics from PCM](#cpu-metrics-from-pcm) +- [Importing dashboards to Grafana](#importing-dashboards-to-grafana) +- [More dashboards](#more-dashboards) -## Prepare +## Pre-conditions -``` -git clone https://github.com/opea-project/GenAIInfra.git -cd kubernetes-addons/Observability -``` - -## 1. Setup Prometheus & Grafana - -Setting up Prometheus and Grafana is essential for monitoring and visualizing your workloads. Follow these steps to get started: - -### Step 1: Install Prometheus&Grafana - -``` -kubectl create ns monitoring -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo update -helm install prometheus-stack prometheus-community/kube-prometheus-stack --version 55.5.1 -n monitoring -``` - -### Step 2: Verify the installation - -``` -kubectl get pods -n monitoring -``` - -### Step 3: Port-forward to access Grafana - -``` -kubectl port-forward service/grafana 3000:80 -``` - -### Step 4: Access Grafana - -Open your browser and navigate to http://localhost:3000. Use "admin/prom-operator" as the username and the password to login. +Monitoring for Helm installed OPEA applications is already working, +see [Helm monitoring option](../../helm-charts/monitoring.md). -## 2. Metrics for Gaudi Hardware (v1.16.2) +## Device metrics for Gaudi HW To monitor Gaudi hardware metrics, you can use the following steps: @@ -93,52 +67,25 @@ promhttp_metric_handler_requests_total{code="503"} 0 ### Step 5: Import the dashboard into Grafana -Manually import the [`Dashboard-Gaudi-HW.json`](./habana/Dashboard-Gaudi-HW.json) file into Grafana +Import the [`Dashboard-Gaudi-HW.json`](./habana/Dashboard-Gaudi-HW.json) file into Grafana ![Gaudi HW dashboard](./assets/habana.png) -## 3. Metrics for OPEA applications - -To monitor OPEA application metrics including TGI-gaudi, TEI, TEI-Reranking and other micro services, you can use the following steps: - -### Step 1: Install application with Helm - -Install Helm (version >= 3.15) first. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. - -Install OPEA application as described in [Helm charts README](../../helm-charts/README.md). - -For example, to install ChatQnA, follow [ChatQnA helm chart](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts/chatqna/README.md) for instructions on deploying it to Kubernetes. - -Make sure to enable [Helm monitoring option](../../helm-charts/monitoring.md). - -### Step 2: Install dashboards - -Here are few Grafana dashboards for monitoring different aspects of OPEA applications: - -- [`queue_size_embedding_rerank_tgi.json`](./chatqna/dashboard/queue_size_embedding_rerank_tgi.json): queue size of TGI-gaudi, TEI-Embedding, TEI-reranking -- [`tgi_grafana.json`](./chatqna/dashboard/tgi_grafana.json): `tgi-gaudi` text generation inferencing service utilization -- [`opea-scaling.json`](./opea-apps/opea-scaling.json): scaling, request rates and failures for OPEA application megaservice, TEI-reranking, TEI-embedding, and TGI - -You can either: +## Extra metrics for OPEA applications -- Import them manually to Grafana, -- Use [`update-dashboards.sh`](./update-dashboards.sh) script to add them to Kubernetes as (more persistent) Grafana dashboard `configMap`s - - Script uses `$USER-` as dashboard `configMap` names, and overwrites any pre-existing `configMap` with the same name -- Or create your own dashboards based on them - -When dashboard is imported to Grafana, you can directly save changes to it, but such dashboards go away if Grafana is removed / re-installed. When dashboard is in `configMap`, Grafana saves its changes to a (selected) file, but you need to re-apply those files to Kubernetes with the script, for your changes to be there when that Grafana dashboard page is reloaded in browser. +Here are few Grafana dashboards for monitoring additional aspects of OPEA applications: -Gotchas for dashboard `configMap` script usage: +- [`queue_size_embedding_rerank_tgi.json`](./chatqna/queue_size_embedding_rerank_tgi.json): queue size of TGI-gaudi, TEI-Embedding, TEI-reranking +- [`tgi_grafana.json`](./chatqna/tgi_grafana.json): `tgi-gaudi` text generation inferencing service utilization -- If you change dashboard file name, you need to change also its 'uid' field (at end of the file), otherwise Grafana will see multiple `configMap`s for the same dashboard ID -- If there's no `uid` specified for the dashboard, Grafana will generate one on `configMap` load. Meaning that dashboard ID, and Grafana URL to it, will change on every reload -- Script assumes default Prometheus / Grafana install (`monitoring` namespace, `grafana_dasboard=1` label identifying dashboard `configMap`s) +Which can be imported to Grafana. NOTE: Services provide metrics only after they have processed at least one query, before that dashboards can be empty! ![TGI dashboard](./assets/tgi.png) -![Scaling dashboard](./assets/opea-scaling.png) -## 4. Metrics for PCM (IntelĀ® Performance Counter Monitor) +## CPU mmetrics from PCM + +To monitor PCM (IntelĀ® Performance Counter Monitor) metrics, you can use the following steps: ### Step 1: Install PCM @@ -160,9 +107,26 @@ kubectl apply -f pcm/pcm-serviceMonitor.yaml ### Step 4: Install the PCM dashboard -manually import the [`pcm-dashboard.json`](./pcm/pcm-dashboard.json) file into the Grafana +Import the [`pcm-dashboard.json`](./pcm/pcm-dashboard.json) file into the Grafana ![PCM dashboard](./assets/pcm.png) +## Importing dashboards to Grafana + +You can either: + +- Import them manually to Grafana, +- Use [`update-dashboards.sh`](./update-dashboards.sh) script to add them to Kubernetes as (more persistent) Grafana dashboard `configMap`s + - Script uses `$USER-` as dashboard `configMap` names, and overwrites any pre-existing `configMap` with the same name +- Or create your own dashboards based on them + +When dashboard is imported to Grafana, you can directly save changes to it, but such dashboards go away if Grafana is removed / re-installed. When dashboard is in `configMap`, Grafana saves its changes to a (selected) file, but you need to re-apply those files to Kubernetes with the script, for your changes to be there when that Grafana dashboard page is reloaded in browser. + +Gotchas for dashboard `configMap` script usage: + +- If you change dashboard file name, you need to change also its 'uid' field (at end of the file), otherwise Grafana will see multiple `configMap`s for the same dashboard ID +- If there's no `uid` specified for the dashboard, Grafana will generate one on `configMap` load. Meaning that dashboard ID, and Grafana URL to it, will change on every reload +- Script assumes default Prometheus / Grafana install (`monitoring` namespace, `grafana_dasboard=1` label identifying dashboard `configMap`s) + ## More dashboards GenAIEval repository includes additional [dashboards](https://github.com/opea-project/GenAIEval/tree/main/evals/benchmark/grafana). diff --git a/kubernetes-addons/Observability/assets/opea-scaling.png b/kubernetes-addons/Observability/assets/opea-scaling.png deleted file mode 100644 index d2a3753a2..000000000 Binary files a/kubernetes-addons/Observability/assets/opea-scaling.png and /dev/null differ diff --git a/kubernetes-addons/Observability/chatqna/dashboard/queue_size_embedding_rerank_tgi.json b/kubernetes-addons/Observability/chatqna/queue_size_embedding_rerank_tgi.json similarity index 100% rename from kubernetes-addons/Observability/chatqna/dashboard/queue_size_embedding_rerank_tgi.json rename to kubernetes-addons/Observability/chatqna/queue_size_embedding_rerank_tgi.json diff --git a/kubernetes-addons/Observability/chatqna/dashboard/tgi_grafana.json b/kubernetes-addons/Observability/chatqna/tgi_grafana.json similarity index 100% rename from kubernetes-addons/Observability/chatqna/dashboard/tgi_grafana.json rename to kubernetes-addons/Observability/chatqna/tgi_grafana.json