lambdaclass · rodrigo-o · Nov 21, 2025 · Nov 18, 2025 · Nov 20, 2025 · Nov 20, 2025
diff --git a/crates/blockchain/metrics/rpc.rs b/crates/blockchain/metrics/rpc.rs
@@ -14,7 +14,7 @@ fn initialize_rpc_outcomes_counter() -> CounterVec {
     register_counter_vec!(
         "rpc_requests_total",
         "Total number of RPC requests partitioned by namespace, method, and outcome",
-        &["namespace", "method", "outcome"],
+        &["namespace", "method", "outcome", "error_kind"],
     )
     .unwrap()
 }
@@ -29,24 +29,31 @@ fn initialize_rpc_duration_histogram() -> HistogramVec {
 }
 
 /// Represents the outcome of an RPC request when recording metrics.
-#[derive(Clone, Copy)]
+#[derive(Clone)]
 pub enum RpcOutcome {
     Success,
-    Error,
+    Error(String),
 }
 
 impl RpcOutcome {
     fn as_label(&self) -> &'static str {
         match self {
             RpcOutcome::Success => "success",
-            RpcOutcome::Error => "error",
+            RpcOutcome::Error(_) => "error",
+        }
+    }
+
+    fn error_kind(&self) -> &str {
+        match self {
+            RpcOutcome::Success => "",
+            RpcOutcome::Error(kind) => kind,
         }
     }
 }
 
 pub fn record_rpc_outcome(namespace: &str, method: &str, outcome: RpcOutcome) {
     METRICS_RPC_REQUEST_OUTCOMES
-        .with_label_values(&[namespace, method, outcome.as_label()])
+        .with_label_values(&[namespace, method, outcome.as_label(), outcome.error_kind()])
         .inc();
 }
 

diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs
@@ -206,10 +206,9 @@ pub trait RpcHandler: Sized {
             )
             .await;
 
-        let outcome = if result.is_ok() {
-            RpcOutcome::Success
-        } else {
-            RpcOutcome::Error
+        let outcome = match &result {
+            Ok(_) => RpcOutcome::Success,
+            Err(err) => RpcOutcome::Error(get_error_kind(err)),
         };
         record_rpc_outcome(namespace, method, outcome);
 
@@ -219,6 +218,27 @@ pub trait RpcHandler: Sized {
     async fn handle(&self, context: RpcApiContext) -> Result<Value, RpcErr>;
 }
 
+fn get_error_kind(err: &RpcErr) -> String {
+    match err {
+        RpcErr::MethodNotFound(_) => "MethodNotFound",
+        RpcErr::WrongParam(_) => "WrongParam",
+        RpcErr::BadParams(_) => "BadParams",
+        RpcErr::MissingParam(_) => "MissingParam",
+        RpcErr::TooLargeRequest => "TooLargeRequest",
+        RpcErr::BadHexFormat(_) => "BadHexFormat",
+        RpcErr::UnsuportedFork(_) => "UnsuportedFork",
+        RpcErr::Internal(_) => "Internal",
+        RpcErr::Vm(_) => "Vm",
+        RpcErr::Revert { .. } => "Revert",
+        RpcErr::Halt { .. } => "Halt",
+        RpcErr::AuthenticationError(_) => "AuthenticationError",
+        RpcErr::InvalidForkChoiceState(_) => "InvalidForkChoiceState",
+        RpcErr::InvalidPayloadAttributes(_) => "InvalidPayloadAttributes",
+        RpcErr::UnknownPayload(_) => "UnknownPayload",
+    }
+    .to_string()
+}
+
 pub const FILTER_DURATION: Duration = {
     if cfg!(test) {
         Duration::from_secs(1)

diff --git a/docs/developers/l1/dashboards.md b/docs/developers/l1/dashboards.md
@@ -94,21 +94,21 @@ Collapsed row that surfaces the `namespace="engine"` Prometheus timers so you ca
 
 ![Engine API row](img/engine_api_row.png)
 
-### Engine Success/Error Rate
-Shows the rate of successful vs. failed Engine API requests per second.
+### Engine Total Time per Method
+Pie chart that shows where Engine time is spent across methods over the selected range. Quickly surfaces which endpoints dominate total processing time.
 
-![Engine Success/Error Rate](img/engine_success_error_rate.png)
-
-### Engine Request Rate by Method
-Shows how many Engine API calls per second we process, split by JSON-RPC method and averaged across the currently selected dashboard range.
-
-![Engine Request Rate by Method](img/engine_request_rate_by_method.png)
+![Engine Total Time per Method](img/engine_total_time_per_method.png)
 
 ### Engine Latency by Methods (Avg Duration)
 Bar gauge of the historical average latency per Engine method over the selected time range.
 
 ![Engine Latency by Methods](img/engine_latency_by_methods.png)
 
+### Engine Request Rate by Method
+Shows how many Engine API calls per second we process, split by JSON-RPC method and averaged across the currently selected dashboard range.
+
+![Engine Request Rate by Method](img/engine_request_rate_by_method.png)
+
 ### Engine Latency by Method
 Live timeseries that tries to correlate to the per-block execution time by showing real-time latency per Engine method with an 18 s lookback window.
 
@@ -122,11 +122,6 @@ Another collapsed row focused on the public JSON-RPC surface (`namespace="rpc"`)
 
 ![RPC API row](img/rpc_api_row.png)
 
-### RPC Success/Error Rate
-Shows the rate of successful vs. failed RPC API requests per second.
-
-![RPC Success/Error Rate](img/rpc_success_error_rate.png)
-
 ### RPC Total Time per Method
 Pie chart that shows where RPC time is spent across methods over the selected range. Quickly surfaces which endpoints dominate total processing time.
 
@@ -149,6 +144,28 @@ Live timeseries that tries to correlate to the per-block execution time by showi
 
 _**Limitations**: The RPC latency views inherit the same windowing caveats as the Engine charts: averages use the dashboard time range while the live chart relies on an 18 s window._
 
+## Engine and RPC Error rates
+
+Collapsed row showing error rates for both Engine and RPC APIs side by side and a deagreagated panel by method and kind of error. Each panel repeats per instance to be able to compare behaviour across nodes.
+
+![Engine and RPC Error rates row](img/engine_and_rpc_error_rates_row.png)
+
+### Engine Success/Error Rate
+Shows the rate of successful vs. failed Engine API requests per second.
+
+![Engine Success/Error Rate](img/engine_success_error_rate.png)
+
+### RPC Success/Error Rate
+Shows the rate of successful vs. failed RPC API requests per second.
+
+![RPC Success/Error Rate](img/rpc_success_error_rate.png)
+
+### Engine and RPC Errors % by Method and Kind
+
+Deaggregated view of error percentages split by method and error kind for both Engine and RPC APIs. The % are calculated against total requests for a particular method, so all different error percentage for a method should sum up to the percentage of errors for that method.
+
+![Engine and RPC Errors % by Method and Kind](img/engine_and_rpc_errors_by_method_and_kind.png)
+
 ## Process and server info
 
 Row panels showing process-level and host-level metrics to help you monitor resource usage and spot potential issues.

diff --git a/docs/developers/l1/img/engine_and_rpc_error_rates_row.png b/docs/developers/l1/img/engine_and_rpc_error_rates_row.png
diff --git a/docs/developers/l1/img/engine_and_rpc_errors_by_method_and_kind.png b/docs/developers/l1/img/engine_and_rpc_errors_by_method_and_kind.png
diff --git a/docs/developers/l1/img/engine_api_row.png b/docs/developers/l1/img/engine_api_row.png
diff --git a/docs/developers/l1/img/engine_success_error_rate.png b/docs/developers/l1/img/engine_success_error_rate.png
diff --git a/docs/developers/l1/img/engine_total_time_per_method.png b/docs/developers/l1/img/engine_total_time_per_method.png
diff --git a/docs/developers/l1/img/rpc_api_row.png b/docs/developers/l1/img/rpc_api_row.png
diff --git a/docs/developers/l1/img/rpc_success_error_rate.png b/docs/developers/l1/img/rpc_success_error_rate.png
diff --git a/docs/developers/l1/img/rpc_time_per_method.png b/docs/developers/l1/img/rpc_time_per_method.png
diff --git a/docs/developers/l1/img/rpc_total_time_per_method.png b/docs/developers/l1/img/rpc_total_time_per_method.png