diff --git a/crates/blockchain/metrics/rpc.rs b/crates/blockchain/metrics/rpc.rs index d8b6227c8c1..89181a07fb8 100644 --- a/crates/blockchain/metrics/rpc.rs +++ b/crates/blockchain/metrics/rpc.rs @@ -14,7 +14,7 @@ fn initialize_rpc_outcomes_counter() -> CounterVec { register_counter_vec!( "rpc_requests_total", "Total number of RPC requests partitioned by namespace, method, and outcome", - &["namespace", "method", "outcome"], + &["namespace", "method", "outcome", "error_kind"], ) .unwrap() } @@ -29,24 +29,31 @@ fn initialize_rpc_duration_histogram() -> HistogramVec { } /// Represents the outcome of an RPC request when recording metrics. -#[derive(Clone, Copy)] +#[derive(Clone)] pub enum RpcOutcome { Success, - Error, + Error(String), } impl RpcOutcome { fn as_label(&self) -> &'static str { match self { RpcOutcome::Success => "success", - RpcOutcome::Error => "error", + RpcOutcome::Error(_) => "error", + } + } + + fn error_kind(&self) -> &str { + match self { + RpcOutcome::Success => "", + RpcOutcome::Error(kind) => kind, } } } pub fn record_rpc_outcome(namespace: &str, method: &str, outcome: RpcOutcome) { METRICS_RPC_REQUEST_OUTCOMES - .with_label_values(&[namespace, method, outcome.as_label()]) + .with_label_values(&[namespace, method, outcome.as_label(), outcome.error_kind()]) .inc(); } diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index 438e67edab2..73639425503 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -206,10 +206,9 @@ pub trait RpcHandler: Sized { ) .await; - let outcome = if result.is_ok() { - RpcOutcome::Success - } else { - RpcOutcome::Error + let outcome = match &result { + Ok(_) => RpcOutcome::Success, + Err(err) => RpcOutcome::Error(get_error_kind(err)), }; record_rpc_outcome(namespace, method, outcome); @@ -219,6 +218,27 @@ pub trait RpcHandler: Sized { async fn handle(&self, context: RpcApiContext) -> Result; } +fn get_error_kind(err: &RpcErr) -> String { + match err { + RpcErr::MethodNotFound(_) => "MethodNotFound", + RpcErr::WrongParam(_) => "WrongParam", + RpcErr::BadParams(_) => "BadParams", + RpcErr::MissingParam(_) => "MissingParam", + RpcErr::TooLargeRequest => "TooLargeRequest", + RpcErr::BadHexFormat(_) => "BadHexFormat", + RpcErr::UnsuportedFork(_) => "UnsuportedFork", + RpcErr::Internal(_) => "Internal", + RpcErr::Vm(_) => "Vm", + RpcErr::Revert { .. } => "Revert", + RpcErr::Halt { .. } => "Halt", + RpcErr::AuthenticationError(_) => "AuthenticationError", + RpcErr::InvalidForkChoiceState(_) => "InvalidForkChoiceState", + RpcErr::InvalidPayloadAttributes(_) => "InvalidPayloadAttributes", + RpcErr::UnknownPayload(_) => "UnknownPayload", + } + .to_string() +} + pub const FILTER_DURATION: Duration = { if cfg!(test) { Duration::from_secs(1) diff --git a/docs/developers/l1/dashboards.md b/docs/developers/l1/dashboards.md index cb34dc55f4a..27a1d68a991 100644 --- a/docs/developers/l1/dashboards.md +++ b/docs/developers/l1/dashboards.md @@ -94,21 +94,21 @@ Collapsed row that surfaces the `namespace="engine"` Prometheus timers so you ca ![Engine API row](img/engine_api_row.png) -### Engine Success/Error Rate -Shows the rate of successful vs. failed Engine API requests per second. +### Engine Total Time per Method +Pie chart that shows where Engine time is spent across methods over the selected range. Quickly surfaces which endpoints dominate total processing time. -![Engine Success/Error Rate](img/engine_success_error_rate.png) - -### Engine Request Rate by Method -Shows how many Engine API calls per second we process, split by JSON-RPC method and averaged across the currently selected dashboard range. - -![Engine Request Rate by Method](img/engine_request_rate_by_method.png) +![Engine Total Time per Method](img/engine_total_time_per_method.png) ### Engine Latency by Methods (Avg Duration) Bar gauge of the historical average latency per Engine method over the selected time range. ![Engine Latency by Methods](img/engine_latency_by_methods.png) +### Engine Request Rate by Method +Shows how many Engine API calls per second we process, split by JSON-RPC method and averaged across the currently selected dashboard range. + +![Engine Request Rate by Method](img/engine_request_rate_by_method.png) + ### Engine Latency by Method Live timeseries that tries to correlate to the per-block execution time by showing real-time latency per Engine method with an 18 s lookback window. @@ -122,11 +122,6 @@ Another collapsed row focused on the public JSON-RPC surface (`namespace="rpc"`) ![RPC API row](img/rpc_api_row.png) -### RPC Success/Error Rate -Shows the rate of successful vs. failed RPC API requests per second. - -![RPC Success/Error Rate](img/rpc_success_error_rate.png) - ### RPC Total Time per Method Pie chart that shows where RPC time is spent across methods over the selected range. Quickly surfaces which endpoints dominate total processing time. @@ -149,6 +144,28 @@ Live timeseries that tries to correlate to the per-block execution time by showi _**Limitations**: The RPC latency views inherit the same windowing caveats as the Engine charts: averages use the dashboard time range while the live chart relies on an 18 s window._ +## Engine and RPC Error rates + +Collapsed row showing error rates for both Engine and RPC APIs side by side and a deagreagated panel by method and kind of error. Each panel repeats per instance to be able to compare behaviour across nodes. + +![Engine and RPC Error rates row](img/engine_and_rpc_error_rates_row.png) + +### Engine Success/Error Rate +Shows the rate of successful vs. failed Engine API requests per second. + +![Engine Success/Error Rate](img/engine_success_error_rate.png) + +### RPC Success/Error Rate +Shows the rate of successful vs. failed RPC API requests per second. + +![RPC Success/Error Rate](img/rpc_success_error_rate.png) + +### Engine and RPC Errors % by Method and Kind + +Deaggregated view of error percentages split by method and error kind for both Engine and RPC APIs. The % are calculated against total requests for a particular method, so all different error percentage for a method should sum up to the percentage of errors for that method. + +![Engine and RPC Errors % by Method and Kind](img/engine_and_rpc_errors_by_method_and_kind.png) + ## Process and server info Row panels showing process-level and host-level metrics to help you monitor resource usage and spot potential issues. diff --git a/docs/developers/l1/img/engine_and_rpc_error_rates_row.png b/docs/developers/l1/img/engine_and_rpc_error_rates_row.png new file mode 100644 index 00000000000..e1b514d03ad Binary files /dev/null and b/docs/developers/l1/img/engine_and_rpc_error_rates_row.png differ diff --git a/docs/developers/l1/img/engine_and_rpc_errors_by_method_and_kind.png b/docs/developers/l1/img/engine_and_rpc_errors_by_method_and_kind.png new file mode 100644 index 00000000000..b0530c2e887 Binary files /dev/null and b/docs/developers/l1/img/engine_and_rpc_errors_by_method_and_kind.png differ diff --git a/docs/developers/l1/img/engine_api_row.png b/docs/developers/l1/img/engine_api_row.png index 56eea15052a..5cecbea776c 100644 Binary files a/docs/developers/l1/img/engine_api_row.png and b/docs/developers/l1/img/engine_api_row.png differ diff --git a/docs/developers/l1/img/engine_success_error_rate.png b/docs/developers/l1/img/engine_success_error_rate.png index ff3ee19a793..b5955bc5bc1 100644 Binary files a/docs/developers/l1/img/engine_success_error_rate.png and b/docs/developers/l1/img/engine_success_error_rate.png differ diff --git a/docs/developers/l1/img/engine_total_time_per_method.png b/docs/developers/l1/img/engine_total_time_per_method.png new file mode 100644 index 00000000000..5e7c88745c9 Binary files /dev/null and b/docs/developers/l1/img/engine_total_time_per_method.png differ diff --git a/docs/developers/l1/img/rpc_api_row.png b/docs/developers/l1/img/rpc_api_row.png index 6a445acb0b3..2fa0b4d5a2a 100644 Binary files a/docs/developers/l1/img/rpc_api_row.png and b/docs/developers/l1/img/rpc_api_row.png differ diff --git a/docs/developers/l1/img/rpc_success_error_rate.png b/docs/developers/l1/img/rpc_success_error_rate.png index 8f9b21fd320..1b6b4940120 100644 Binary files a/docs/developers/l1/img/rpc_success_error_rate.png and b/docs/developers/l1/img/rpc_success_error_rate.png differ diff --git a/docs/developers/l1/img/rpc_time_per_method.png b/docs/developers/l1/img/rpc_time_per_method.png deleted file mode 100644 index f41c7c5b6ec..00000000000 Binary files a/docs/developers/l1/img/rpc_time_per_method.png and /dev/null differ diff --git a/docs/developers/l1/img/rpc_total_time_per_method.png b/docs/developers/l1/img/rpc_total_time_per_method.png new file mode 100644 index 00000000000..3aa49aa7a30 Binary files /dev/null and b/docs/developers/l1/img/rpc_total_time_per_method.png differ diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 6287e55b958..dadfdc85dc2 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1212,7 +1212,7 @@ "h": 13, "w": 6, "x": 0, - "y": 27 + "y": 83 }, "id": 18, "options": { @@ -1325,7 +1325,7 @@ "h": 13, "w": 6, "x": 6, - "y": 27 + "y": 83 }, "id": 63, "options": { @@ -1453,7 +1453,7 @@ "h": 13, "w": 12, "x": 12, - "y": 27 + "y": 83 }, "id": 61, "interval": "5s", @@ -1597,186 +1597,54 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Total time spent per method for all RPC endpoints", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Requests/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 4, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "showValues": false, - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" } }, "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "reqps" + "unit": "s" }, "overrides": [] }, "gridPos": { "h": 14, - "w": 4, + "w": 5, "x": 0, - "y": 28 + "y": 97 }, - "id": 116, + "id": 118, "options": { + "displayLabels": [], "legend": { - "calcs": [ - "mean", - "max" - ], "displayMode": "table", - "placement": "bottom", + "placement": "right", "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "12.2.1", - "repeat": "instance", - "repeatDirection": "v", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (outcome) (rate(rpc_requests_total{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", - "legendFormat": "{{outcome}}", - "range": true, - "refId": "A" - } - ], - "title": "Engine Success/Error Rate - $instance", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "reqps" + "values": [ + "percent" + ] }, - "overrides": [] - }, - "gridPos": { - "h": 14, - "w": 5, - "x": 4, - "y": 28 - }, - "id": 113, - "options": { - "legend": { + "pieType": "pie", + "reduceOptions": { "calcs": [ - "mean", - "max" + "sum" ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true + "fields": "", + "values": false }, + "sort": "desc", "tooltip": { "hideZeros": false, - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, "pluginVersion": "12.2.1", @@ -1789,15 +1657,15 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (method) (\n rate(rpc_request_duration_seconds_count{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n namespace=\"engine\"\n }[$__range])\n)", - "instant": false, + "expr": "sum by (method) (increase(rpc_request_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "instant": true, "legendFormat": "{{method}}", - "range": true, + "range": false, "refId": "A" } ], - "title": "Engine Request Rate by Method - $instance", - "type": "timeseries" + "title": "Engine Total Time per Method - $instance", + "type": "piechart" }, { "datasource": { @@ -1831,8 +1699,8 @@ "gridPos": { "h": 14, "w": 3, - "x": 9, - "y": 28 + "x": 5, + "y": 97 }, "id": 115, "interval": "10s", @@ -1887,7 +1755,6 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "", "fieldConfig": { "defaults": { "color": { @@ -1897,12 +1764,12 @@ "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", - "axisLabel": "Duration", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1911,17 +1778,14 @@ }, "insertNulls": false, "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "always", + "showPoints": "never", "showValues": false, - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -1940,32 +1804,31 @@ } ] }, - "unit": "s" + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 14, - "w": 12, - "x": 12, - "y": 28 + "w": 4, + "x": 8, + "y": 97 }, - "id": 112, - "interval": "10s", + "id": 113, "options": { "legend": { "calcs": [ "mean", "max" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "desc" + "mode": "single", + "sort": "none" } }, "pluginVersion": "12.2.1", @@ -1978,37 +1841,22 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "exemplar": false, - "expr": "sum by (method) (increase(rpc_request_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s])) \n/ sum by (method) (increase(rpc_request_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s]))", + "expr": "sum by (method) (\n rate(rpc_request_duration_seconds_count{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n namespace=\"engine\"\n }[$__range])\n)", "instant": false, - "interval": "", - "legendFormat": "__auto", + "legendFormat": "{{method}}", "range": true, "refId": "A" } ], - "title": "Engine Latency by Method - $instance", + "title": "Engine Request Rate by Method - $instance", "type": "timeseries" - } - ], - "title": "Engine API", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 100, - "panels": [ + }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { @@ -2018,12 +1866,12 @@ "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", - "axisLabel": "Requests/sec", + "axisLabel": "Duration", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 15, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2032,17 +1880,20 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 2, - "pointSize": 4, + "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "always", "showValues": false, "spanNulls": true, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -2058,28 +1909,27 @@ } ] }, - "unit": "reqps" + "unit": "s" }, "overrides": [] }, "gridPos": { "h": 14, - "w": 4, - "x": 0, - "y": 29 + "w": 12, + "x": 12, + "y": 97 }, - "id": 107, + "id": 112, + "interval": "10s", "options": { "legend": { "calcs": [ "mean", "max" ], - "displayMode": "table", + "displayMode": "list", "placement": "bottom", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, @@ -2097,15 +1947,32 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (outcome) (rate(rpc_requests_total{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", - "legendFormat": "{{outcome}}", - "range": true, - "refId": "A" - } - ], - "title": "RPC Success/Error Rate - $instance", + "exemplar": false, + "expr": "sum by (method) (increase(rpc_request_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s])) \n/ sum by (method) (increase(rpc_request_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s]))", + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Engine Latency by Method - $instance", "type": "timeseries" - }, + } + ], + "title": "Engine API", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 100, + "panels": [ { "datasource": { "type": "prometheus", @@ -2132,8 +1999,8 @@ "gridPos": { "h": 14, "w": 7, - "x": 4, - "y": 29 + "x": 0, + "y": 112 }, "id": 103, "options": { @@ -2240,8 +2107,8 @@ "gridPos": { "h": 14, "w": 3, - "x": 11, - "y": 29 + "x": 7, + "y": 112 }, "id": 104, "options": { @@ -2353,9 +2220,9 @@ }, "gridPos": { "h": 14, - "w": 4, - "x": 14, - "y": 29 + "w": 5, + "x": 10, + "y": 112 }, "id": 101, "options": { @@ -2455,9 +2322,9 @@ }, "gridPos": { "h": 14, - "w": 6, - "x": 18, - "y": 29 + "w": 9, + "x": 15, + "y": 112 }, "id": 102, "options": { @@ -2500,165 +2367,521 @@ "type": "row" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, - "id": 48, - "panels": [], - "title": "Process & Server Info", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" + "id": 117, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests/sec", + "axisPlacement": "auto", + "axisSoftMax": 0.3, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ { - "color": "green", - "value": 0 + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] }, { - "color": "red", - "value": 86400 + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] } ] }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 5, - "x": 0, - "y": 30 - }, - "id": 34, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "gridPos": { + "h": 14, + "w": 6, + "x": 0, + "y": 86 + }, + "id": 116, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (rate(rpc_requests_total{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } ], - "fields": "", - "values": false + "title": "Engine Success/Error Rate - $instance", + "type": "timeseries" }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "12.2.1", - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "time() - process_start_time_seconds{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", - "legendFormat": "uptime_s", - "range": true, - "refId": "A" - } - ], - "title": "Uptime", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 1000 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 5, - "y": 30 - }, - "id": 33, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests/sec", + "axisPlacement": "auto", + "axisSoftMax": 0.7, + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 6, + "x": 6, + "y": 86 + }, + "id": 107, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (outcome) (rate(rpc_requests_total{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "legendFormat": "{{outcome}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC Success/Error Rate - $instance", + "type": "timeseries" }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "12.2.1", - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "process_threads{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", - "legendFormat": "threads", - "range": true, - "refId": "A" + "description": "This shows error % by kind in the total requests for a particular method, ie:\n- 50 request to method_a\n- 5 internal errors on method_a\n- 12 timeout errors on method_a\n\nResult:\n- 10% of method_a - % of internal error\n- 24% of method_a - % of timeout error ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests", + "axisPlacement": "right", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*%/" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "left" + }, + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.axisLabel", + "value": "Error %" + } + ] + }, + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "engine_getPayloadBodiesByRangeV1 - % of Internal error" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 12, + "x": 12, + "y": 86 + }, + "id": 108, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (method, error_kind) (\n increase(rpc_requests_total{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n outcome=\"error\"\n }[$__range])\n)", + "hide": true, + "instant": false, + "legendFormat": "{{method}} - {{error_kind}} error", + "range": true, + "refId": "errors_by_method_kind" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (method) (\n increase(rpc_requests_total{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\"\n }[$__range])\n)", + "hide": true, + "instant": false, + "legendFormat": "{{method}} - Total requests", + "range": true, + "refId": "total_by_method" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "100 *\nsum by (method, error_kind) (\n increase(rpc_requests_total{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n outcome=\"error\"\n }[$__range])\n)\n/\non (method) group_left\nsum by (method) (\n increase(rpc_requests_total{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\"\n }[$__range])\n)", + "hide": false, + "instant": false, + "legendFormat": "{{method}} - % of {{error_kind}} error", + "range": true, + "refId": "error_pct_by_method_kind" + } + ], + "title": "Engine & RPC Errors % by Method & Kind - $instance", + "type": "timeseries" } ], - "title": "Threads", - "type": "stat" + "title": "Engine and RPC Error rates", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 48, + "panels": [], + "title": "Process & Server Info", + "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -2668,9 +2891,13 @@ "color": "green", "value": 0 }, + { + "color": "yellow", + "value": 60 + }, { "color": "red", - "value": 1000 + "value": 85 } ] } @@ -2678,28 +2905,27 @@ "overrides": [] }, "gridPos": { - "h": 3, - "w": 5, - "x": 7, - "y": 30 + "h": 5, + "w": 12, + "x": 0, + "y": 31 }, - "id": 43, + "id": 32, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "mean", + "max" ], - "fields": "", - "values": false + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "showPercentChange": false, - "textMode": "value", - "wideLayout": true + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } }, "pluginVersion": "12.2.1", "targets": [ @@ -2710,13 +2936,13 @@ }, "editorMode": "code", "expr": "process_open_fds{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", - "legendFormat": "Open FDs", + "legendFormat": "{{ instance }} FDs used", "range": true, "refId": "A" } ], - "title": "Open FDs", - "type": "stat" + "title": "Open FDs Historic", + "type": "timeseries" }, { "datasource": { @@ -2785,7 +3011,7 @@ "h": 8, "w": 12, "x": 12, - "y": 30 + "y": 31 }, "id": 46, "options": { @@ -2827,45 +3053,79 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "mode": "thresholds" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 86400 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 5, + "x": 0, + "y": 36 + }, + "id": 34, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "time() - process_start_time_seconds{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", + "legendFormat": "uptime_s", + "range": true, + "refId": "A" + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -2876,12 +3136,77 @@ "value": 0 }, { - "color": "yellow", - "value": 60 + "color": "red", + "value": 1000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 5, + "y": 36 + }, + "id": 33, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "process_threads{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", + "legendFormat": "threads", + "range": true, + "refId": "A" + } + ], + "title": "Threads", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 }, { "color": "red", - "value": 85 + "value": 1000 } ] } @@ -2889,27 +3214,28 @@ "overrides": [] }, "gridPos": { - "h": 5, - "w": 12, - "x": 0, - "y": 33 + "h": 3, + "w": 5, + "x": 7, + "y": 36 }, - "id": 32, + "id": 43, "options": { - "legend": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { "calcs": [ - "mean", - "max" + "lastNotNull" ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "fields": "", + "values": false }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } + "showPercentChange": false, + "textMode": "value", + "wideLayout": true }, "pluginVersion": "12.2.1", "targets": [ @@ -2920,13 +3246,13 @@ }, "editorMode": "code", "expr": "process_open_fds{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\"}", - "legendFormat": "{{ instance }} FDs used", + "legendFormat": "Open FDs", "range": true, "refId": "A" } ], - "title": "Open FDs Historic", - "type": "timeseries" + "title": "Open FDs", + "type": "stat" }, { "datasource": { @@ -2995,7 +3321,7 @@ "h": 6, "w": 12, "x": 0, - "y": 38 + "y": 39 }, "id": 30, "options": { @@ -3123,7 +3449,7 @@ "h": 6, "w": 12, "x": 12, - "y": 38 + "y": 39 }, "id": 31, "options": { @@ -3227,7 +3553,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 45 }, "id": 45, "options": { @@ -3366,7 +3692,7 @@ "h": 8, "w": 12, "x": 12, - "y": 44 + "y": 45 }, "id": 42, "options": { @@ -3416,7 +3742,7 @@ "h": 1, "w": 24, "x": 0, - "y": 52 + "y": 53 }, "id": 55, "panels": [ @@ -3486,7 +3812,7 @@ "h": 8, "w": 6, "x": 0, - "y": 188 + "y": 388 }, "id": 56, "options": { @@ -3585,7 +3911,7 @@ "h": 8, "w": 9, "x": 6, - "y": 188 + "y": 388 }, "id": 57, "options": { @@ -3685,7 +4011,7 @@ "h": 8, "w": 9, "x": 15, - "y": 188 + "y": 388 }, "id": 58, "options": { @@ -3807,5 +4133,5 @@ "timezone": "utc", "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", - "version": 41 + "version": 42 }