From 8e6dc0b43e631ba41bac3ca02a76ffcd75e074ef Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Fri, 7 Nov 2025 19:01:17 -0300 Subject: [PATCH 01/28] add namespace to profiling --- crates/blockchain/metrics/profiling.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index cd90977d365..2b05eaa5491 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -11,8 +11,8 @@ pub static METRICS_BLOCK_PROCESSING_PROFILE: LazyLock = fn initialize_histogram_vec() -> HistogramVec { register_histogram_vec!( "function_duration_seconds", - "Histogram of the run time of the functions in block processing", - &["function_name"] + "Histogram of the run time of the functions in block processing and RPC handling", + &["namespace", "function_name"] ) .unwrap() } @@ -34,9 +34,26 @@ where && span.metadata().target().starts_with("ethrex") { let name = span.metadata().name(); + let target = span.metadata().target(); + + // Determine namespace based on the module target + // ethrex_networking::rpc -> "rpc" + // ethrex_blockchain -> "block_processing" + // ethrex_storage -> "storage" + let namespace = if target.contains("::rpc") { + "rpc" + } else if target.contains("blockchain") { + "block_processing" + } else if target.contains("storage") { + "storage" + } else if target.contains("networking") { + "networking" + } else { + "other" + }; let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[name]) + .with_label_values(&[namespace, name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); From 5b5e3d65ba2d3692c8d9eaec38328df96c01e06b Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Mon, 10 Nov 2025 17:24:34 -0300 Subject: [PATCH 02/28] initial testing of instumentation --- crates/blockchain/metrics/profiling.rs | 46 ++++++++- crates/networking/rpc/rpc.rs | 125 +++++++++++++++---------- 2 files changed, 116 insertions(+), 55 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 2b05eaa5491..89950446912 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -1,6 +1,6 @@ use prometheus::{Encoder, HistogramTimer, HistogramVec, TextEncoder, register_histogram_vec}; use std::sync::LazyLock; -use tracing::{Subscriber, span::Id}; +use tracing::{Subscriber, span::{Attributes, Id}, field::{Field, Visit}}; use tracing_subscriber::{Layer, layer::Context, registry::LookupSpan}; use crate::MetricsError; @@ -25,15 +25,47 @@ pub struct FunctionProfilingLayer; /// Wrapper around [`HistogramTimer`] to avoid conflicts with other layers struct ProfileTimer(HistogramTimer); +/// Visitor to extract the 'method' field from RPC spans for more granular profiling +#[derive(Default)] +struct MethodVisitor { + method: Option, +} + +impl Visit for MethodVisitor { + fn record_str(&mut self, field: &Field, value: &str) { + if field.name() == "method" { + self.method = Some(value.to_string()); + } + } + + fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { + if field.name() == "method" { + self.method = Some(format!("{:?}", value).trim_matches('"').to_string()); + } + } +} + impl Layer for FunctionProfilingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { + fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) { + // Extract the 'method' field if present (used by RPC instrumentation) + let mut visitor = MethodVisitor::default(); + attrs.record(&mut visitor); + + if let Some(method_name) = visitor.method { + // Store the method name in span extensions for later use + if let Some(span) = ctx.span(id) { + span.extensions_mut().insert(method_name); + } + } + } + fn on_enter(&self, id: &Id, ctx: Context<'_, S>) { if let Some(span) = ctx.span(id) && span.metadata().target().starts_with("ethrex") { - let name = span.metadata().name(); let target = span.metadata().target(); // Determine namespace based on the module target @@ -52,8 +84,16 @@ where "other" }; + // Check if we have a stored method name (from RPC middleware) + // Otherwise fall back to the span name + let function_name = span + .extensions() + .get::() + .map(|s| s.clone()) + .unwrap_or_else(|| span.metadata().name().to_string()); + let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[namespace, name]) + .with_label_values(&[namespace, &function_name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index d61a81a7c59..c5b7c54c67b 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -195,6 +195,27 @@ pub trait RpcHandler: Sized { async fn handle(&self, context: RpcApiContext) -> Result; } +/// Middleware wrapper that instruments RPC method calls with tracing spans for profiling. +/// The profiling layer will automatically detect this is in the RPC namespace and set +/// the appropriate metrics labels: namespace="rpc", function_name= +#[inline] +async fn instrumented_call( + method: &str, + req: &RpcRequest, + context: RpcApiContext, +) -> Result { + // Create a span with the RPC method name + // The profiling layer will automatically: + // 1. Detect this is in ethrex_networking::rpc + // 2. Set namespace="rpc" + // 3. Set function_name= + // 4. Track timing on span exit + let span = tracing::trace_span!("rpc_call", method = %method); + let _enter = span.enter(); + + T::call(req, context).await +} + pub const FILTER_DURATION: Duration = { if cfg!(test) { Duration::from_secs(1) @@ -454,36 +475,36 @@ pub async fn map_authrpc_requests( pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "eth_chainId" => ChainId::call(req, context).await, - "eth_syncing" => Syncing::call(req, context).await, - "eth_getBlockByNumber" => GetBlockByNumberRequest::call(req, context).await, - "eth_getBlockByHash" => GetBlockByHashRequest::call(req, context).await, - "eth_getBalance" => GetBalanceRequest::call(req, context).await, - "eth_getCode" => GetCodeRequest::call(req, context).await, - "eth_getStorageAt" => GetStorageAtRequest::call(req, context).await, + "eth_chainId" => instrumented_call::("eth_chainId", req, context).await, + "eth_syncing" => instrumented_call::("eth_syncing", req, context).await, + "eth_getBlockByNumber" => instrumented_call::("eth_getBlockByNumber", req, context).await, + "eth_getBlockByHash" => instrumented_call::("eth_getBlockByHash", req, context).await, + "eth_getBalance" => instrumented_call::("eth_getBalance", req, context).await, + "eth_getCode" => instrumented_call::("eth_getCode", req, context).await, + "eth_getStorageAt" => instrumented_call::("eth_getStorageAt", req, context).await, "eth_getBlockTransactionCountByNumber" => { - GetBlockTransactionCountRequest::call(req, context).await + instrumented_call::("eth_getBlockTransactionCountByNumber", req, context).await } "eth_getBlockTransactionCountByHash" => { - GetBlockTransactionCountRequest::call(req, context).await + instrumented_call::("eth_getBlockTransactionCountByHash", req, context).await } "eth_getTransactionByBlockNumberAndIndex" => { - GetTransactionByBlockNumberAndIndexRequest::call(req, context).await + instrumented_call::("eth_getTransactionByBlockNumberAndIndex", req, context).await } "eth_getTransactionByBlockHashAndIndex" => { - GetTransactionByBlockHashAndIndexRequest::call(req, context).await + instrumented_call::("eth_getTransactionByBlockHashAndIndex", req, context).await } - "eth_getBlockReceipts" => GetBlockReceiptsRequest::call(req, context).await, - "eth_getTransactionByHash" => GetTransactionByHashRequest::call(req, context).await, - "eth_getTransactionReceipt" => GetTransactionReceiptRequest::call(req, context).await, - "eth_createAccessList" => CreateAccessListRequest::call(req, context).await, - "eth_blockNumber" => BlockNumberRequest::call(req, context).await, - "eth_call" => CallRequest::call(req, context).await, - "eth_blobBaseFee" => GetBlobBaseFee::call(req, context).await, - "eth_getTransactionCount" => GetTransactionCountRequest::call(req, context).await, - "eth_feeHistory" => FeeHistoryRequest::call(req, context).await, - "eth_estimateGas" => EstimateGasRequest::call(req, context).await, - "eth_getLogs" => LogsFilter::call(req, context).await, + "eth_getBlockReceipts" => instrumented_call::("eth_getBlockReceipts", req, context).await, + "eth_getTransactionByHash" => instrumented_call::("eth_getTransactionByHash", req, context).await, + "eth_getTransactionReceipt" => instrumented_call::("eth_getTransactionReceipt", req, context).await, + "eth_createAccessList" => instrumented_call::("eth_createAccessList", req, context).await, + "eth_blockNumber" => instrumented_call::("eth_blockNumber", req, context).await, + "eth_call" => instrumented_call::("eth_call", req, context).await, + "eth_blobBaseFee" => instrumented_call::("eth_blobBaseFee", req, context).await, + "eth_getTransactionCount" => instrumented_call::("eth_getTransactionCount", req, context).await, + "eth_feeHistory" => instrumented_call::("eth_feeHistory", req, context).await, + "eth_estimateGas" => instrumented_call::("eth_estimateGas", req, context).await, + "eth_getLogs" => instrumented_call::("eth_getLogs", req, context).await, "eth_newFilter" => { NewFilterRequest::stateful_call(req, context.storage, context.active_filters).await } @@ -493,26 +514,26 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul "eth_getFilterChanges" => { FilterChangesRequest::stateful_call(req, context.storage, context.active_filters).await } - "eth_sendRawTransaction" => SendRawTransactionRequest::call(req, context).await, - "eth_getProof" => GetProofRequest::call(req, context).await, - "eth_gasPrice" => GasPrice::call(req, context).await, + "eth_sendRawTransaction" => instrumented_call::("eth_sendRawTransaction", req, context).await, + "eth_getProof" => instrumented_call::("eth_getProof", req, context).await, + "eth_gasPrice" => instrumented_call::("eth_gasPrice", req, context).await, "eth_maxPriorityFeePerGas" => { - eth::max_priority_fee::MaxPriorityFee::call(req, context).await + instrumented_call::("eth_maxPriorityFeePerGas", req, context).await } - "eth_config" => Config::call(req, context).await, + "eth_config" => instrumented_call::("eth_config", req, context).await, unknown_eth_method => Err(RpcErr::MethodNotFound(unknown_eth_method.to_owned())), } } pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "debug_getRawHeader" => GetRawHeaderRequest::call(req, context).await, - "debug_getRawBlock" => GetRawBlockRequest::call(req, context).await, - "debug_getRawTransaction" => GetRawTransaction::call(req, context).await, - "debug_getRawReceipts" => GetRawReceipts::call(req, context).await, - "debug_executionWitness" => ExecutionWitnessRequest::call(req, context).await, - "debug_traceTransaction" => TraceTransactionRequest::call(req, context).await, - "debug_traceBlockByNumber" => TraceBlockByNumberRequest::call(req, context).await, + "debug_getRawHeader" => instrumented_call::("debug_getRawHeader", req, context).await, + "debug_getRawBlock" => instrumented_call::("debug_getRawBlock", req, context).await, + "debug_getRawTransaction" => instrumented_call::("debug_getRawTransaction", req, context).await, + "debug_getRawReceipts" => instrumented_call::("debug_getRawReceipts", req, context).await, + "debug_executionWitness" => instrumented_call::("debug_executionWitness", req, context).await, + "debug_traceTransaction" => instrumented_call::("debug_traceTransaction", req, context).await, + "debug_traceBlockByNumber" => instrumented_call::("debug_traceBlockByNumber", req, context).await, unknown_debug_method => Err(RpcErr::MethodNotFound(unknown_debug_method.to_owned())), } } @@ -522,30 +543,30 @@ pub async fn map_engine_requests( context: RpcApiContext, ) -> Result { match req.method.as_str() { - "engine_exchangeCapabilities" => ExchangeCapabilitiesRequest::call(req, context).await, - "engine_forkchoiceUpdatedV1" => ForkChoiceUpdatedV1::call(req, context).await, - "engine_forkchoiceUpdatedV2" => ForkChoiceUpdatedV2::call(req, context).await, - "engine_forkchoiceUpdatedV3" => ForkChoiceUpdatedV3::call(req, context).await, - "engine_newPayloadV4" => NewPayloadV4Request::call(req, context).await, - "engine_newPayloadV3" => NewPayloadV3Request::call(req, context).await, - "engine_newPayloadV2" => NewPayloadV2Request::call(req, context).await, - "engine_newPayloadV1" => NewPayloadV1Request::call(req, context).await, + "engine_exchangeCapabilities" => instrumented_call::("engine_exchangeCapabilities", req, context).await, + "engine_forkchoiceUpdatedV1" => instrumented_call::("engine_forkchoiceUpdatedV1", req, context).await, + "engine_forkchoiceUpdatedV2" => instrumented_call::("engine_forkchoiceUpdatedV2", req, context).await, + "engine_forkchoiceUpdatedV3" => instrumented_call::("engine_forkchoiceUpdatedV3", req, context).await, + "engine_newPayloadV4" => instrumented_call::("engine_newPayloadV4", req, context).await, + "engine_newPayloadV3" => instrumented_call::("engine_newPayloadV3", req, context).await, + "engine_newPayloadV2" => instrumented_call::("engine_newPayloadV2", req, context).await, + "engine_newPayloadV1" => instrumented_call::("engine_newPayloadV1", req, context).await, "engine_exchangeTransitionConfigurationV1" => { - ExchangeTransitionConfigV1Req::call(req, context).await + instrumented_call::("engine_exchangeTransitionConfigurationV1", req, context).await } - "engine_getPayloadV5" => GetPayloadV5Request::call(req, context).await, - "engine_getPayloadV4" => GetPayloadV4Request::call(req, context).await, - "engine_getPayloadV3" => GetPayloadV3Request::call(req, context).await, - "engine_getPayloadV2" => GetPayloadV2Request::call(req, context).await, - "engine_getPayloadV1" => GetPayloadV1Request::call(req, context).await, + "engine_getPayloadV5" => instrumented_call::("engine_getPayloadV5", req, context).await, + "engine_getPayloadV4" => instrumented_call::("engine_getPayloadV4", req, context).await, + "engine_getPayloadV3" => instrumented_call::("engine_getPayloadV3", req, context).await, + "engine_getPayloadV2" => instrumented_call::("engine_getPayloadV2", req, context).await, + "engine_getPayloadV1" => instrumented_call::("engine_getPayloadV1", req, context).await, "engine_getPayloadBodiesByHashV1" => { - GetPayloadBodiesByHashV1Request::call(req, context).await + instrumented_call::("engine_getPayloadBodiesByHashV1", req, context).await } "engine_getPayloadBodiesByRangeV1" => { - GetPayloadBodiesByRangeV1Request::call(req, context).await + instrumented_call::("engine_getPayloadBodiesByRangeV1", req, context).await } - "engine_getBlobsV1" => BlobsV1Request::call(req, context).await, - "engine_getBlobsV2" => BlobsV2Request::call(req, context).await, + "engine_getBlobsV1" => instrumented_call::("engine_getBlobsV1", req, context).await, + "engine_getBlobsV2" => instrumented_call::("engine_getBlobsV2", req, context).await, unknown_engine_method => Err(RpcErr::MethodNotFound(unknown_engine_method.to_owned())), } } From 042b26674d4663aa4c72a3913c096a07ef2bb43e Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Mon, 10 Nov 2025 17:58:50 -0300 Subject: [PATCH 03/28] remove rpc namespaced instrumentation from breakdown panels --- .../dashboards/common_dashboards/ethrex_l1_perf.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index b8b41b0b260..8b4eeb71ade 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1258,7 +1258,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(\n sum by (function_name) (\n increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name!~\"Block execution|Execute Block|Trie update\"}[$__range])\n )\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Block execution\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__range]) ),\n \"function_name\",\"Block Execution (w/o reads)\",\"__name__\",\".*\"\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Execute Block\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Block execution\"}[$__range]) ),\n \"function_name\",\"Merkleization Diff\",\"__name__\",\".*\"\n)", + "expr": "(\n sum by (function_name) (\n increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name!~\"Block execution|Execute Block|Trie update\"}[$__range])\n )\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__range]) ),\n \"function_name\",\"Block Execution (w/o reads)\",\"__name__\",\".*\"\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) ),\n \"function_name\",\"Merkleization Diff\",\"__name__\",\".*\"\n)", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -1362,7 +1362,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "label_replace(\n (sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Trie update\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Block execution\"}[$__range]) ))\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Execute Block\"}[$__range]) ),\n \"function_name\",\"Execution vs Merkleization Diff %\",\"__name__\",\".*\"\n)", + "expr": "label_replace(\n (sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Trie update\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) ))\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) ),\n \"function_name\",\"Execution vs Merkleization Diff %\",\"__name__\",\".*\"\n)", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -1376,7 +1376,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "label_replace(\n 1 - ( sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Trie update\"}[$__range]) )\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Execute Block\"}[$__range]) )),\n \"function_name\",\"Merkleization vs Total diff\",\"__name__\",\".*\"\n)", + "expr": "label_replace(\n 1 - ( sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Trie update\"}[$__range]) )\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) )),\n \"function_name\",\"Merkleization vs Total diff\",\"__name__\",\".*\"\n)", "hide": true, "instant": false, "legendFormat": "__auto", @@ -1517,7 +1517,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n # Calculates the Execute Block first and removes zeroes generated by idelta to smooth the panel\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Execute Block\"}[$__rate_interval]),\n 0\n )\n )\n) > 0\nor\n(\n# Calculates the rest of the instruments, except Execute Block, which was calculated before\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name!~\"Execute Block|Block execution\"}[$__rate_interval]),\n 0\n )\n )\n)\nor\nlabel_replace(\n sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=\"Block execution\"}[$__rate_interval]),\n 0\n )\n )\n - sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__rate_interval]),\n 0\n )\n ),\n \"function_name\",\"Block Execution (w/o reads)\",\"instance\",\".*\"\n)", + "expr": "(\n # Calculates the Execute Block first and removes zeroes generated by idelta to smooth the panel\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__rate_interval]),\n 0\n )\n )\n) > 0\nor\n(\n# Calculates the rest of the instruments, except Execute Block, which was calculated before\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name!~\"Execute Block|Block execution\"}[$__rate_interval]),\n 0\n )\n )\n)\nor\nlabel_replace(\n sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__rate_interval]),\n 0\n )\n )\n - sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__rate_interval]),\n 0\n )\n ),\n \"function_name\",\"Block Execution (w/o reads)\",\"instance\",\".*\"\n)", "hide": false, "instant": false, "legendFormat": "{{instance}} {{function_name}}", From e873b852cffe3b05a2e92e914386ae0c5a8d0ffb Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Mon, 10 Nov 2025 20:17:05 -0300 Subject: [PATCH 04/28] initial rpc dashboards --- .../common_dashboards/ethrex_l1_perf.json | 420 +++++++++++++++++- 1 file changed, 418 insertions(+), 2 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 8b4eeb71ade..d49cc217bac 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1601,6 +1601,422 @@ "x": 0, "y": 27 }, + "id": 100, + "panels": [], + "title": "RPC Performance", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 101, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC Request Rate by Method", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Duration", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "RPC Latency Percentiles (All Methods)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 103, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC Time Breakdown (Total Time per Method)", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.1 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Method" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 104, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Avg Duration" + } + ] + }, + "pluginVersion": "12.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Top 15 Slowest RPC Methods (Avg Duration)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "instance": true, + "job": true, + "namespace": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Avg Duration", + "function_name": "Method" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, "id": 48, "panels": [], "title": "Process & Server Info", @@ -2495,7 +2911,7 @@ "refId": "B" } ], - "title": "Host Ram (GiB) — Used vs Total", + "title": "Host Ram (GiB) \u2014 Used vs Total", "type": "timeseries" }, { @@ -2896,4 +3312,4 @@ "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", "version": 35 -} +} \ No newline at end of file From cc97128990c0774a795bb1eb5e2ed63e0f7d7ea2 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Mon, 10 Nov 2025 20:27:07 -0300 Subject: [PATCH 05/28] made some organization changes in the dashboard --- .../common_dashboards/ethrex_l1_perf.json | 875 +++++++++--------- 1 file changed, 452 insertions(+), 423 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index d49cc217bac..113e5ec8a54 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 3, + "id": 2, "links": [], "panels": [ { @@ -37,6 +37,9 @@ "type": "auto" }, "filterable": true, + "footer": { + "reducers": [] + }, "inspect": false }, "mappings": [], @@ -65,17 +68,9 @@ "id": 13, "options": { "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, "showHeader": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -132,6 +127,9 @@ "type": "auto" }, "filterable": true, + "footer": { + "reducers": [] + }, "inspect": false }, "mappings": [], @@ -161,17 +159,9 @@ "id": 15, "options": { "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, "showHeader": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -230,6 +220,9 @@ "type": "auto" }, "filterable": true, + "footer": { + "reducers": [] + }, "inspect": false }, "mappings": [], @@ -258,17 +251,9 @@ "id": 17, "options": { "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, "showHeader": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -366,6 +351,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -431,7 +417,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -484,6 +470,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -551,7 +538,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -619,6 +606,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -666,7 +654,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -823,7 +811,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -1061,7 +1049,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -1594,7 +1582,7 @@ "type": "row" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -1602,412 +1590,447 @@ "y": 27 }, "id": 100, - "panels": [], - "title": "RPC Performance", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Requests/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 28 - }, - "id": 101, - "options": { - "legend": { - "calcs": [ - "mean", - "max" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "12.2.0", - "targets": [ + "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", - "legendFormat": "{{function_name}}", - "range": true, - "refId": "A" - } - ], - "title": "RPC Request Rate by Method", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Duration", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "reqps" }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 101, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 28 - }, - "id": 102, - "options": { - "legend": { - "calcs": [ - "mean", - "max" + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "12.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p50", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p95", - "range": true, - "refId": "B" + "title": "RPC Request Rate by Method", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p99", - "range": true, - "refId": "C" - } - ], - "title": "RPC Latency Percentiles (All Methods)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Duration", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "s" + }, + "overrides": [] }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 102, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" } }, - "mappings": [], - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 36 - }, - "id": 103, - "options": { - "displayLabels": [ - "percent" - ], - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "sum" + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", + "legendFormat": "p99", + "range": true, + "refId": "C" + } ], - "fields": "", - "values": false + "title": "RPC Latency Percentiles (All Methods)", + "type": "timeseries" }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "12.2.0", - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", - "legendFormat": "{{function_name}}", - "range": true, - "refId": "A" - } - ], - "title": "RPC Time Breakdown (Total Time per Method)", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "filterable": true, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "yellow", - "value": 0.1 + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } }, + "mappings": [], + "unit": "s" + }, + "overrides": [ { - "color": "red", - "value": 1 + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "engine_exchangeCapabilities", + "engine_forkchoiceUpdatedV3", + "engine_getBlobsV2", + "engine_getPayloadBodiesByRangeV1", + "eth_blockNumber", + "eth_chainId", + "eth_gasPrice", + "eth_getBlockByNumber", + "eth_syncing" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] } ] }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Method" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 103, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 36 - }, - "id": 104, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } ], - "show": false + "title": "RPC Time Breakdown (Total Time per Method)", + "type": "piechart" }, - "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "Avg Duration" - } - ] - }, - "pluginVersion": "12.2.0", - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "editorMode": "code", - "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Top 15 Slowest RPC Methods (Avg Duration)", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "instance": true, - "job": true, - "namespace": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.1 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "s" }, - "indexByName": {}, - "renameByName": { - "Value": "Avg Duration", - "function_name": "Method" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Method" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 104, + "options": { + "cellHeight": "sm", + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Avg Duration" + } + ] + }, + "pluginVersion": "12.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } - } + ], + "title": "Top 15 Slowest RPC Methods (Avg Duration)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "instance": true, + "job": true, + "namespace": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Avg Duration", + "function_name": "Method" + } + } + } + ], + "type": "table" } ], - "type": "table" + "title": "RPC Performance", + "type": "row" }, { "collapsed": false, @@ -2015,7 +2038,7 @@ "h": 1, "w": 24, "x": 0, - "y": 44 + "y": 28 }, "id": 48, "panels": [], @@ -2054,7 +2077,7 @@ "h": 3, "w": 5, "x": 0, - "y": 28 + "y": 29 }, "id": 34, "options": { @@ -2074,7 +2097,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2123,7 +2146,7 @@ "h": 3, "w": 2, "x": 5, - "y": 28 + "y": 29 }, "id": 33, "options": { @@ -2143,7 +2166,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2191,7 +2214,7 @@ "h": 3, "w": 5, "x": 7, - "y": 28 + "y": 29 }, "id": 43, "options": { @@ -2211,7 +2234,7 @@ "textMode": "value", "wideLayout": true }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2263,6 +2286,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -2294,7 +2318,7 @@ "h": 8, "w": 12, "x": 12, - "y": 28 + "y": 29 }, "id": 46, "options": { @@ -2314,7 +2338,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "disableTextWrap": false, @@ -2366,6 +2390,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -2400,7 +2425,7 @@ "h": 5, "w": 12, "x": 0, - "y": 31 + "y": 32 }, "id": 32, "options": { @@ -2419,7 +2444,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2471,6 +2496,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -2502,7 +2528,7 @@ "h": 6, "w": 12, "x": 0, - "y": 36 + "y": 37 }, "id": 30, "options": { @@ -2521,7 +2547,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2573,6 +2599,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -2629,7 +2656,7 @@ "h": 6, "w": 12, "x": 12, - "y": 36 + "y": 37 }, "id": 31, "options": { @@ -2648,7 +2675,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "datasource": { @@ -2700,6 +2727,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": true, "stacking": { "group": "A", @@ -2732,7 +2760,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 43 }, "id": 45, "options": { @@ -2751,7 +2779,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "editorMode": "code", @@ -2799,6 +2827,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": true, "stacking": { "group": "A", @@ -2870,7 +2899,7 @@ "h": 8, "w": 12, "x": 12, - "y": 42 + "y": 43 }, "id": 42, "options": { @@ -2890,7 +2919,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "targets": [ { "editorMode": "code", @@ -2911,7 +2940,7 @@ "refId": "B" } ], - "title": "Host Ram (GiB) \u2014 Used vs Total", + "title": "Host Ram (GiB) — Used vs Total", "type": "timeseries" }, { @@ -2920,7 +2949,7 @@ "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 51 }, "id": 55, "panels": [ @@ -2990,7 +3019,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1061 + "y": 1107 }, "id": 56, "options": { @@ -3089,7 +3118,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1061 + "y": 1107 }, "id": 57, "options": { @@ -3189,7 +3218,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1061 + "y": 1107 }, "id": 58, "options": { @@ -3229,7 +3258,7 @@ ], "preload": false, "refresh": "5s", - "schemaVersion": 41, + "schemaVersion": 42, "tags": [], "templating": { "list": [ @@ -3311,5 +3340,5 @@ "timezone": "utc", "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", - "version": 35 -} \ No newline at end of file + "version": 36 +} From 14dbeaf4e18d6050bf18200aa5271d14c2efd1ff Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Mon, 10 Nov 2025 20:34:52 -0300 Subject: [PATCH 06/28] make rpc dashboard repeatable by instance --- .../common_dashboards/ethrex_l1_perf.json | 434 +++++++++--------- 1 file changed, 205 insertions(+), 229 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 113e5ec8a54..2420381afad 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1239,7 +1239,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "repeat": "instance", "repeatDirection": "v", "targets": [ @@ -1293,6 +1293,7 @@ "type": "linear" }, "showPoints": "auto", + "showValues": false, "spanNulls": false, "stacking": { "group": "A", @@ -1343,7 +1344,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "repeat": "instance", "repeatDirection": "v", "targets": [ @@ -1495,7 +1496,7 @@ "sort": "none" } }, - "pluginVersion": "12.2.0", + "pluginVersion": "12.2.1", "repeat": "instance", "repeatDirection": "v", "targets": [ @@ -1591,6 +1592,194 @@ }, "id": 100, "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 7, + "x": 0, + "y": 28 + }, + "id": 103, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } + ], + "title": "RPC Time Breakdown (Total Time per Method)", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 0.1 + }, + { + "color": "red", + "value": 1 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Method" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 5, + "x": 7, + "y": 28 + }, + "id": 104, + "options": { + "cellHeight": "sm", + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Avg Duration" + } + ] + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Top 15 Slowest RPC Methods (Avg Duration)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "instance": true, + "job": true, + "namespace": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Avg Duration", + "function_name": "Method" + } + } + } + ], + "type": "table" + }, { "datasource": { "type": "prometheus", @@ -1650,9 +1839,9 @@ "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, + "h": 14, + "w": 6, + "x": 12, "y": 28 }, "id": 101, @@ -1675,6 +1864,8 @@ } }, "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", "targets": [ { "datasource": { @@ -1750,9 +1941,9 @@ "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, + "h": 14, + "w": 6, + "x": 18, "y": 28 }, "id": 102, @@ -1773,6 +1964,8 @@ } }, "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", "targets": [ { "datasource": { @@ -1810,223 +2003,6 @@ ], "title": "RPC Latency Percentiles (All Methods)", "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "s" - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "engine_exchangeCapabilities", - "engine_forkchoiceUpdatedV3", - "engine_getBlobsV2", - "engine_getPayloadBodiesByRangeV1", - "eth_blockNumber", - "eth_chainId", - "eth_gasPrice", - "eth_getBlockByNumber", - "eth_syncing" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": true, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 36 - }, - "id": 103, - "options": { - "displayLabels": [ - "percent" - ], - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "sum" - ], - "fields": "", - "values": false - }, - "sort": "desc", - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", - "legendFormat": "{{function_name}}", - "range": true, - "refId": "A" - } - ], - "title": "RPC Time Breakdown (Total Time per Method)", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "filterable": true, - "footer": { - "reducers": [] - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - }, - { - "color": "yellow", - "value": 0.1 - }, - { - "color": "red", - "value": 1 - } - ] - }, - "unit": "s" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Method" - }, - "properties": [ - { - "id": "custom.width", - "value": 300 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 36 - }, - "id": 104, - "options": { - "cellHeight": "sm", - "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "Avg Duration" - } - ] - }, - "pluginVersion": "12.2.1", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Top 15 Slowest RPC Methods (Avg Duration)", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "instance": true, - "job": true, - "namespace": true - }, - "indexByName": {}, - "renameByName": { - "Value": "Avg Duration", - "function_name": "Method" - } - } - } - ], - "type": "table" } ], "title": "RPC Performance", @@ -3019,7 +2995,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1107 + "y": 1150 }, "id": 56, "options": { @@ -3118,7 +3094,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1107 + "y": 1150 }, "id": 57, "options": { @@ -3218,7 +3194,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1107 + "y": 1150 }, "id": 58, "options": { From 6dc47b9dd6cacbeb98bded08119d403de6f92822 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 15:19:29 -0300 Subject: [PATCH 07/28] separate Engine API from RPC API in instrumentation --- RPC_INSTRUMENTATION_APPLIED.md | 94 ++++ crates/blockchain/metrics/profiling.rs | 72 +-- crates/networking/rpc/rpc.rs | 120 +++-- .../common_dashboards/ethrex_l1_perf.json | 453 +++++++++++++++++- 4 files changed, 639 insertions(+), 100 deletions(-) create mode 100644 RPC_INSTRUMENTATION_APPLIED.md diff --git a/RPC_INSTRUMENTATION_APPLIED.md b/RPC_INSTRUMENTATION_APPLIED.md new file mode 100644 index 00000000000..269653f0ac9 --- /dev/null +++ b/RPC_INSTRUMENTATION_APPLIED.md @@ -0,0 +1,94 @@ +# RPC Instrumentation - Middleware Approach with Namespace Separation + +## What Was Done + +Added per-method profiling for RPC and Engine API calls using a middleware pattern with explicit namespace separation. + +## Changes + +### 1. `crates/blockchain/metrics/profiling.rs` +- Added `namespace` label to histogram: `&["namespace", "function_name"]` +- Uses wrapper structs `MethodName` and `Namespace` to disambiguate span extensions +- Extracts both `namespace` and `method` fields from spans +- Falls back to module path-based detection if namespace not explicitly set +- Supports explicit namespace override via span fields + +### 2. `crates/networking/rpc/rpc.rs` +- Updated middleware function to accept namespace parameter: +```rust +async fn instrumented_call( + namespace: &str, + method: &str, + req: &RpcRequest, + context: RpcApiContext +) -> Result +{ + let span = tracing::trace_span!("rpc_call", namespace = %namespace, method = %method); + let _enter = span.enter(); + T::call(req, context).await +} +``` +- Applied to **60+ RPC methods** with namespace separation: + - **35 eth_\* methods** → `namespace="rpc"` + - **7 debug_\* methods** → `namespace="rpc"` + - **18 engine_\* methods** → `namespace="engine"` + +### 3. `metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json` +- Added **Engine API Performance** row (id: 110) with 4 panels: + - Engine API Time Breakdown (piechart) + - Top 15 Slowest Engine API Methods (table with 4s threshold) + - Engine API Request Rate by Method (timeseries) + - Engine API Latency Percentiles (timeseries with 4s warning line) +- Updated existing **RPC Performance** row (id: 100) with 4 panels: + - RPC Time Breakdown (piechart) + - Top 15 Slowest RPC Methods (table) + - RPC Request Rate by Method (timeseries) + - RPC Latency Percentiles (timeseries) +- Updated **Block Execution Breakdown** panels to filter out RPC/Engine metrics using `namespace!="rpc"` and `namespace!="engine"` + +## Metrics Generated + +```promql +# RPC API metrics (eth_*, debug_*) +function_duration_seconds{namespace="rpc", function_name="eth_blockNumber"} +function_duration_seconds{namespace="rpc", function_name="debug_traceTransaction"} + +# Engine API metrics (engine_*) +function_duration_seconds{namespace="engine", function_name="engine_newPayloadV4"} +function_duration_seconds{namespace="engine", function_name="engine_forkchoiceUpdatedV3"} + +# Block execution metrics (unchanged) +function_duration_seconds{namespace="block_processing", function_name="Execute Block"} +``` + +## Namespace Separation Rationale + +**Engine API** is separated from general **RPC API** because: +- Engine API is **critical for validators** - directly interfaces with consensus clients +- **Payload build time** must be < 4s for block proposals (beacon chain slot timing) +- `newPayload`, `forkchoiceUpdated`, and `getPayload` have strict performance requirements +- Different SLA requirements: Engine API failures impact network consensus, RPC API failures impact user experience +- Enables targeted monitoring and alerting for validator operations + +## Grafana Queries + +```promql +# RPC only +rate(function_duration_seconds_sum{namespace="rpc"}[5m]) + +# RPC latency percentiles +histogram_quantile(0.99, rate(function_duration_seconds_bucket{namespace="rpc"}[5m])) + +# Compare namespaces +sum by (namespace) (rate(function_duration_seconds_sum[5m])) +``` + +## Testing + +```bash +# Start with metrics +cargo run --release -- --metrics-enabled + +# Check metrics +curl http://localhost:6060/metrics | grep 'namespace="rpc"' +``` diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 89950446912..a28a8025693 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -25,22 +25,34 @@ pub struct FunctionProfilingLayer; /// Wrapper around [`HistogramTimer`] to avoid conflicts with other layers struct ProfileTimer(HistogramTimer); -/// Visitor to extract the 'method' field from RPC spans for more granular profiling +/// Wrapper to store method name in span extensions +struct MethodName(String); + +/// Wrapper to store namespace in span extensions +struct Namespace(String); + +/// Visitor to extract 'method' and 'namespace' fields from RPC spans for more granular profiling #[derive(Default)] -struct MethodVisitor { +struct SpanFieldVisitor { method: Option, + namespace: Option, } -impl Visit for MethodVisitor { +impl Visit for SpanFieldVisitor { fn record_str(&mut self, field: &Field, value: &str) { - if field.name() == "method" { - self.method = Some(value.to_string()); + match field.name() { + "method" => self.method = Some(value.to_string()), + "namespace" => self.namespace = Some(value.to_string()), + _ => {} } } fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { - if field.name() == "method" { - self.method = Some(format!("{:?}", value).trim_matches('"').to_string()); + let value_str = format!("{:?}", value).trim_matches('"').to_string(); + match field.name() { + "method" => self.method = Some(value_str), + "namespace" => self.namespace = Some(value_str), + _ => {} } } } @@ -50,14 +62,17 @@ where S: Subscriber + for<'a> LookupSpan<'a>, { fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) { - // Extract the 'method' field if present (used by RPC instrumentation) - let mut visitor = MethodVisitor::default(); + // Extract 'method' and 'namespace' fields if present (used by RPC instrumentation) + let mut visitor = SpanFieldVisitor::default(); attrs.record(&mut visitor); - if let Some(method_name) = visitor.method { - // Store the method name in span extensions for later use - if let Some(span) = ctx.span(id) { - span.extensions_mut().insert(method_name); + if let Some(span) = ctx.span(id) { + let mut extensions = span.extensions_mut(); + if let Some(method_name) = visitor.method { + extensions.insert(MethodName(method_name)); + } + if let Some(namespace) = visitor.namespace { + extensions.insert(Namespace(namespace)); } } } @@ -67,33 +82,34 @@ where && span.metadata().target().starts_with("ethrex") { let target = span.metadata().target(); + let extensions = span.extensions(); - // Determine namespace based on the module target - // ethrex_networking::rpc -> "rpc" - // ethrex_blockchain -> "block_processing" - // ethrex_storage -> "storage" - let namespace = if target.contains("::rpc") { - "rpc" + // First, check if namespace was explicitly set in span (from RPC middleware) + // Otherwise, determine namespace based on the module target + let namespace = if let Some(ns) = extensions.get::() { + ns.0.clone() + } else if target.contains("::rpc") { + "rpc".to_string() } else if target.contains("blockchain") { - "block_processing" + "block_processing".to_string() } else if target.contains("storage") { - "storage" + "storage".to_string() } else if target.contains("networking") { - "networking" + "networking".to_string() } else { - "other" + "other".to_string() }; // Check if we have a stored method name (from RPC middleware) // Otherwise fall back to the span name - let function_name = span - .extensions() - .get::() - .map(|s| s.clone()) + let function_name = extensions + .get::() + .map(|m| m.0.clone()) .unwrap_or_else(|| span.metadata().name().to_string()); + drop(extensions); let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[namespace, &function_name]) + .with_label_values(&[&namespace, &function_name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index 295ccc2b52f..05fe87771ae 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -198,21 +198,19 @@ pub trait RpcHandler: Sized { } /// Middleware wrapper that instruments RPC method calls with tracing spans for profiling. -/// The profiling layer will automatically detect this is in the RPC namespace and set -/// the appropriate metrics labels: namespace="rpc", function_name= +/// The profiling layer will use the provided namespace and method name for metrics labels. #[inline] async fn instrumented_call( + namespace: &str, method: &str, req: &RpcRequest, context: RpcApiContext, ) -> Result { - // Create a span with the RPC method name - // The profiling layer will automatically: - // 1. Detect this is in ethrex_networking::rpc - // 2. Set namespace="rpc" - // 3. Set function_name= - // 4. Track timing on span exit - let span = tracing::trace_span!("rpc_call", method = %method); + // Create a span with namespace and method name + // The profiling layer will extract these fields and use them for metrics: + // namespace = "rpc" | "engine" + // function_name = method name (e.g., "eth_blockNumber", "engine_newPayloadV4") + let span = tracing::trace_span!("rpc_call", namespace = %namespace, method = %method); let _enter = span.enter(); T::call(req, context).await @@ -477,36 +475,36 @@ pub async fn map_authrpc_requests( pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "eth_chainId" => instrumented_call::("eth_chainId", req, context).await, - "eth_syncing" => instrumented_call::("eth_syncing", req, context).await, - "eth_getBlockByNumber" => instrumented_call::("eth_getBlockByNumber", req, context).await, - "eth_getBlockByHash" => instrumented_call::("eth_getBlockByHash", req, context).await, - "eth_getBalance" => instrumented_call::("eth_getBalance", req, context).await, - "eth_getCode" => instrumented_call::("eth_getCode", req, context).await, - "eth_getStorageAt" => instrumented_call::("eth_getStorageAt", req, context).await, + "eth_chainId" => instrumented_call::("rpc", "eth_chainId", req, context).await, + "eth_syncing" => instrumented_call::("rpc", "eth_syncing", req, context).await, + "eth_getBlockByNumber" => instrumented_call::("rpc", "eth_getBlockByNumber", req, context).await, + "eth_getBlockByHash" => instrumented_call::("rpc", "eth_getBlockByHash", req, context).await, + "eth_getBalance" => instrumented_call::("rpc", "eth_getBalance", req, context).await, + "eth_getCode" => instrumented_call::("rpc", "eth_getCode", req, context).await, + "eth_getStorageAt" => instrumented_call::("rpc", "eth_getStorageAt", req, context).await, "eth_getBlockTransactionCountByNumber" => { - instrumented_call::("eth_getBlockTransactionCountByNumber", req, context).await + instrumented_call::("rpc", "eth_getBlockTransactionCountByNumber", req, context).await } "eth_getBlockTransactionCountByHash" => { - instrumented_call::("eth_getBlockTransactionCountByHash", req, context).await + instrumented_call::("rpc", "eth_getBlockTransactionCountByHash", req, context).await } "eth_getTransactionByBlockNumberAndIndex" => { - instrumented_call::("eth_getTransactionByBlockNumberAndIndex", req, context).await + instrumented_call::("rpc", "eth_getTransactionByBlockNumberAndIndex", req, context).await } "eth_getTransactionByBlockHashAndIndex" => { - instrumented_call::("eth_getTransactionByBlockHashAndIndex", req, context).await + instrumented_call::("rpc", "eth_getTransactionByBlockHashAndIndex", req, context).await } - "eth_getBlockReceipts" => instrumented_call::("eth_getBlockReceipts", req, context).await, - "eth_getTransactionByHash" => instrumented_call::("eth_getTransactionByHash", req, context).await, - "eth_getTransactionReceipt" => instrumented_call::("eth_getTransactionReceipt", req, context).await, - "eth_createAccessList" => instrumented_call::("eth_createAccessList", req, context).await, - "eth_blockNumber" => instrumented_call::("eth_blockNumber", req, context).await, - "eth_call" => instrumented_call::("eth_call", req, context).await, - "eth_blobBaseFee" => instrumented_call::("eth_blobBaseFee", req, context).await, - "eth_getTransactionCount" => instrumented_call::("eth_getTransactionCount", req, context).await, - "eth_feeHistory" => instrumented_call::("eth_feeHistory", req, context).await, - "eth_estimateGas" => instrumented_call::("eth_estimateGas", req, context).await, - "eth_getLogs" => instrumented_call::("eth_getLogs", req, context).await, + "eth_getBlockReceipts" => instrumented_call::("rpc", "eth_getBlockReceipts", req, context).await, + "eth_getTransactionByHash" => instrumented_call::("rpc", "eth_getTransactionByHash", req, context).await, + "eth_getTransactionReceipt" => instrumented_call::("rpc", "eth_getTransactionReceipt", req, context).await, + "eth_createAccessList" => instrumented_call::("rpc", "eth_createAccessList", req, context).await, + "eth_blockNumber" => instrumented_call::("rpc", "eth_blockNumber", req, context).await, + "eth_call" => instrumented_call::("rpc", "eth_call", req, context).await, + "eth_blobBaseFee" => instrumented_call::("rpc", "eth_blobBaseFee", req, context).await, + "eth_getTransactionCount" => instrumented_call::("rpc", "eth_getTransactionCount", req, context).await, + "eth_feeHistory" => instrumented_call::("rpc", "eth_feeHistory", req, context).await, + "eth_estimateGas" => instrumented_call::("rpc", "eth_estimateGas", req, context).await, + "eth_getLogs" => instrumented_call::("rpc", "eth_getLogs", req, context).await, "eth_newFilter" => { NewFilterRequest::stateful_call(req, context.storage, context.active_filters).await } @@ -516,26 +514,26 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul "eth_getFilterChanges" => { FilterChangesRequest::stateful_call(req, context.storage, context.active_filters).await } - "eth_sendRawTransaction" => instrumented_call::("eth_sendRawTransaction", req, context).await, - "eth_getProof" => instrumented_call::("eth_getProof", req, context).await, - "eth_gasPrice" => instrumented_call::("eth_gasPrice", req, context).await, + "eth_sendRawTransaction" => instrumented_call::("rpc", "eth_sendRawTransaction", req, context).await, + "eth_getProof" => instrumented_call::("rpc", "eth_getProof", req, context).await, + "eth_gasPrice" => instrumented_call::("rpc", "eth_gasPrice", req, context).await, "eth_maxPriorityFeePerGas" => { - instrumented_call::("eth_maxPriorityFeePerGas", req, context).await + instrumented_call::("rpc", "eth_maxPriorityFeePerGas", req, context).await } - "eth_config" => instrumented_call::("eth_config", req, context).await, + "eth_config" => instrumented_call::("rpc", "eth_config", req, context).await, unknown_eth_method => Err(RpcErr::MethodNotFound(unknown_eth_method.to_owned())), } } pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "debug_getRawHeader" => instrumented_call::("debug_getRawHeader", req, context).await, - "debug_getRawBlock" => instrumented_call::("debug_getRawBlock", req, context).await, - "debug_getRawTransaction" => instrumented_call::("debug_getRawTransaction", req, context).await, - "debug_getRawReceipts" => instrumented_call::("debug_getRawReceipts", req, context).await, - "debug_executionWitness" => instrumented_call::("debug_executionWitness", req, context).await, - "debug_traceTransaction" => instrumented_call::("debug_traceTransaction", req, context).await, - "debug_traceBlockByNumber" => instrumented_call::("debug_traceBlockByNumber", req, context).await, + "debug_getRawHeader" => instrumented_call::("rpc", "debug_getRawHeader", req, context).await, + "debug_getRawBlock" => instrumented_call::("rpc", "debug_getRawBlock", req, context).await, + "debug_getRawTransaction" => instrumented_call::("rpc", "debug_getRawTransaction", req, context).await, + "debug_getRawReceipts" => instrumented_call::("rpc", "debug_getRawReceipts", req, context).await, + "debug_executionWitness" => instrumented_call::("rpc", "debug_executionWitness", req, context).await, + "debug_traceTransaction" => instrumented_call::("rpc", "debug_traceTransaction", req, context).await, + "debug_traceBlockByNumber" => instrumented_call::("rpc", "debug_traceBlockByNumber", req, context).await, unknown_debug_method => Err(RpcErr::MethodNotFound(unknown_debug_method.to_owned())), } } @@ -545,30 +543,30 @@ pub async fn map_engine_requests( context: RpcApiContext, ) -> Result { match req.method.as_str() { - "engine_exchangeCapabilities" => instrumented_call::("engine_exchangeCapabilities", req, context).await, - "engine_forkchoiceUpdatedV1" => instrumented_call::("engine_forkchoiceUpdatedV1", req, context).await, - "engine_forkchoiceUpdatedV2" => instrumented_call::("engine_forkchoiceUpdatedV2", req, context).await, - "engine_forkchoiceUpdatedV3" => instrumented_call::("engine_forkchoiceUpdatedV3", req, context).await, - "engine_newPayloadV4" => instrumented_call::("engine_newPayloadV4", req, context).await, - "engine_newPayloadV3" => instrumented_call::("engine_newPayloadV3", req, context).await, - "engine_newPayloadV2" => instrumented_call::("engine_newPayloadV2", req, context).await, - "engine_newPayloadV1" => instrumented_call::("engine_newPayloadV1", req, context).await, + "engine_exchangeCapabilities" => instrumented_call::("engine", "engine_exchangeCapabilities", req, context).await, + "engine_forkchoiceUpdatedV1" => instrumented_call::("engine", "engine_forkchoiceUpdatedV1", req, context).await, + "engine_forkchoiceUpdatedV2" => instrumented_call::("engine", "engine_forkchoiceUpdatedV2", req, context).await, + "engine_forkchoiceUpdatedV3" => instrumented_call::("engine", "engine_forkchoiceUpdatedV3", req, context).await, + "engine_newPayloadV4" => instrumented_call::("engine", "engine_newPayloadV4", req, context).await, + "engine_newPayloadV3" => instrumented_call::("engine", "engine_newPayloadV3", req, context).await, + "engine_newPayloadV2" => instrumented_call::("engine", "engine_newPayloadV2", req, context).await, + "engine_newPayloadV1" => instrumented_call::("engine", "engine_newPayloadV1", req, context).await, "engine_exchangeTransitionConfigurationV1" => { - instrumented_call::("engine_exchangeTransitionConfigurationV1", req, context).await + instrumented_call::("engine", "engine_exchangeTransitionConfigurationV1", req, context).await } - "engine_getPayloadV5" => instrumented_call::("engine_getPayloadV5", req, context).await, - "engine_getPayloadV4" => instrumented_call::("engine_getPayloadV4", req, context).await, - "engine_getPayloadV3" => instrumented_call::("engine_getPayloadV3", req, context).await, - "engine_getPayloadV2" => instrumented_call::("engine_getPayloadV2", req, context).await, - "engine_getPayloadV1" => instrumented_call::("engine_getPayloadV1", req, context).await, + "engine_getPayloadV5" => instrumented_call::("engine", "engine_getPayloadV5", req, context).await, + "engine_getPayloadV4" => instrumented_call::("engine", "engine_getPayloadV4", req, context).await, + "engine_getPayloadV3" => instrumented_call::("engine", "engine_getPayloadV3", req, context).await, + "engine_getPayloadV2" => instrumented_call::("engine", "engine_getPayloadV2", req, context).await, + "engine_getPayloadV1" => instrumented_call::("engine", "engine_getPayloadV1", req, context).await, "engine_getPayloadBodiesByHashV1" => { - instrumented_call::("engine_getPayloadBodiesByHashV1", req, context).await + instrumented_call::("engine", "engine_getPayloadBodiesByHashV1", req, context).await } "engine_getPayloadBodiesByRangeV1" => { - instrumented_call::("engine_getPayloadBodiesByRangeV1", req, context).await + instrumented_call::("engine", "engine_getPayloadBodiesByRangeV1", req, context).await } - "engine_getBlobsV1" => instrumented_call::("engine_getBlobsV1", req, context).await, - "engine_getBlobsV2" => instrumented_call::("engine_getBlobsV2", req, context).await, + "engine_getBlobsV1" => instrumented_call::("engine", "engine_getBlobsV1", req, context).await, + "engine_getBlobsV2" => instrumented_call::("engine", "engine_getBlobsV2", req, context).await, unknown_engine_method => Err(RpcErr::MethodNotFound(unknown_engine_method.to_owned())), } } diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 2420381afad..20cbcd59e96 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1590,6 +1590,437 @@ "x": 0, "y": 27 }, + "id": 110, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 7, + "x": 0, + "y": 28 + }, + "id": 113, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "sum" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Engine API Time Breakdown (Total Time per Method)", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "footer": { + "reducers": [] + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 4 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Method" + }, + "properties": [ + { + "id": "custom.width", + "value": 350 + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 5, + "x": 7, + "y": 28 + }, + "id": 114, + "options": { + "cellHeight": "sm", + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Avg Duration" + } + ] + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]))", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Top 15 Slowest Engine API Methods (Avg Duration)", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "instance": true, + "job": true, + "namespace": true + }, + "indexByName": {}, + "renameByName": { + "Value": "Avg Duration", + "function_name": "Method" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Requests/sec", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 6, + "x": 12, + "y": 28 + }, + "id": 111, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]))", + "legendFormat": "{{function_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Engine API Request Rate by Method", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Engine API latency percentiles - critical for validators. p99 should be well under 4s for block building.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Duration", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 4 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 6, + "x": 18, + "y": 28 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", + "legendFormat": "p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", + "legendFormat": "p99", + "range": true, + "refId": "C" + } + ], + "title": "Engine API Latency Percentiles (All Methods)", + "type": "timeseries" + } + ], + "title": "Engine API Performance", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, "id": 100, "panels": [ { @@ -2014,7 +2445,7 @@ "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 29 }, "id": 48, "panels": [], @@ -2053,7 +2484,7 @@ "h": 3, "w": 5, "x": 0, - "y": 29 + "y": 30 }, "id": 34, "options": { @@ -2122,7 +2553,7 @@ "h": 3, "w": 2, "x": 5, - "y": 29 + "y": 30 }, "id": 33, "options": { @@ -2190,7 +2621,7 @@ "h": 3, "w": 5, "x": 7, - "y": 29 + "y": 30 }, "id": 43, "options": { @@ -2294,7 +2725,7 @@ "h": 8, "w": 12, "x": 12, - "y": 29 + "y": 30 }, "id": 46, "options": { @@ -2401,7 +2832,7 @@ "h": 5, "w": 12, "x": 0, - "y": 32 + "y": 33 }, "id": 32, "options": { @@ -2504,7 +2935,7 @@ "h": 6, "w": 12, "x": 0, - "y": 37 + "y": 38 }, "id": 30, "options": { @@ -2632,7 +3063,7 @@ "h": 6, "w": 12, "x": 12, - "y": 37 + "y": 38 }, "id": 31, "options": { @@ -2736,7 +3167,7 @@ "h": 8, "w": 12, "x": 0, - "y": 43 + "y": 44 }, "id": 45, "options": { @@ -2875,7 +3306,7 @@ "h": 8, "w": 12, "x": 12, - "y": 43 + "y": 44 }, "id": 42, "options": { @@ -2925,7 +3356,7 @@ "h": 1, "w": 24, "x": 0, - "y": 51 + "y": 52 }, "id": 55, "panels": [ From d0885bb5f70291799b2d8ad43024ed25556d4b3d Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 16:57:51 -0300 Subject: [PATCH 08/28] change total sum by average in pie charts of rpc --- .../dashboards/common_dashboards/ethrex_l1_perf.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 20cbcd59e96..822746f4b6b 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1659,13 +1659,13 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range])) / sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", "legendFormat": "{{function_name}}", "range": true, "refId": "A" } ], - "title": "Engine API Time Breakdown (Total Time per Method)", + "title": "Engine API Average Time per Method", "type": "piechart" }, { @@ -2090,13 +2090,13 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range])) / sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", "legendFormat": "{{function_name}}", "range": true, "refId": "A" } ], - "title": "RPC Time Breakdown (Total Time per Method)", + "title": "RPC Average Time per Method", "type": "piechart" }, { From 6f82153cc86388d611dd4ac5bc6aff0118e10043 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 17:03:21 -0300 Subject: [PATCH 09/28] move from increase to rate for calculating the avg --- .../grafana/dashboards/common_dashboards/ethrex_l1_perf.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 822746f4b6b..160171667fd 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1659,7 +1659,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range])) / sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "expr": "(sum by (function_name) (rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__rate_interval])) / sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__rate_interval])) > 0) or (sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}))", "legendFormat": "{{function_name}}", "range": true, "refId": "A" @@ -2090,7 +2090,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range])) / sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", + "expr": "(sum by (function_name) (rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__rate_interval])) / sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__rate_interval])) > 0) or (sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}))", "legendFormat": "{{function_name}}", "range": true, "refId": "A" From 1b6cefaa724cf3bbd8c8f3419483c6240f93b4c2 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 17:07:00 -0300 Subject: [PATCH 10/28] go without any increase or rate --- .../dashboards/common_dashboards/ethrex_l1_perf.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 160171667fd..a6283b58d22 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1659,9 +1659,10 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(sum by (function_name) (rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__rate_interval])) / sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__rate_interval])) > 0) or (sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}))", + "expr": "sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"})", "legendFormat": "{{function_name}}", - "range": true, + "instant": true, + "range": false, "refId": "A" } ], @@ -2090,9 +2091,10 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(sum by (function_name) (rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__rate_interval])) / sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__rate_interval])) > 0) or (sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}))", + "expr": "sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"})", "legendFormat": "{{function_name}}", - "range": true, + "instant": true, + "range": false, "refId": "A" } ], From c7d20d6bea3088f0dff12fe45caa7515cdd20fd0 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 19:06:51 -0300 Subject: [PATCH 11/28] rework panels for engine api --- .../common_dashboards/ethrex_l1_perf.json | 362 ++++++------------ 1 file changed, 119 insertions(+), 243 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index a6283b58d22..859388563ae 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1246,7 +1246,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(\n sum by (function_name) (\n increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name!~\"Block execution|Execute Block|Trie update\"}[$__range])\n )\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__range]) ),\n \"function_name\",\"Block Execution (w/o reads)\",\"__name__\",\".*\"\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) ),\n \"function_name\",\"Merkleization Diff\",\"__name__\",\".*\"\n)", + "expr": "(\n sum by (function_name) (\n increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name!~\"Block execution|Execute Block|Trie update\"}[$__range])\n )\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Block execution\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__range]) ),\n \"function_name\",\"Block Execution (w/o reads)\",\"__name__\",\".*\"\n)\nor\nlabel_replace(\n sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Execute Block\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Block execution\"}[$__range]) ),\n \"function_name\",\"Merkleization Diff\",\"__name__\",\".*\"\n)", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -1351,7 +1351,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "label_replace(\n (sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Trie update\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__range]) ))\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) ),\n \"function_name\",\"Execution vs Merkleization Diff %\",\"__name__\",\".*\"\n)", + "expr": "label_replace(\n (sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Trie update\"}[$__range]) )\n - sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Block execution\"}[$__range]) ))\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Execute Block\"}[$__range]) ),\n \"function_name\",\"Execution vs Merkleization Diff %\",\"__name__\",\".*\"\n)", "fullMetaSearch": false, "includeNullMetadata": false, "legendFormat": "__auto", @@ -1365,7 +1365,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "label_replace(\n 1 - ( sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Trie update\"}[$__range]) )\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__range]) )),\n \"function_name\",\"Merkleization vs Total diff\",\"__name__\",\".*\"\n)", + "expr": "label_replace(\n 1 - ( sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Trie update\"}[$__range]) )\n / sum( increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Execute Block\"}[$__range]) )),\n \"function_name\",\"Merkleization vs Total diff\",\"__name__\",\".*\"\n)", "hide": true, "instant": false, "legendFormat": "__auto", @@ -1506,7 +1506,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n # Calculates the Execute Block first and removes zeroes generated by idelta to smooth the panel\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Execute Block\"}[$__rate_interval]),\n 0\n )\n )\n) > 0\nor\n(\n# Calculates the rest of the instruments, except Execute Block, which was calculated before\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name!~\"Execute Block|Block execution\"}[$__rate_interval]),\n 0\n )\n )\n)\nor\nlabel_replace(\n sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=\"Block execution\"}[$__rate_interval]),\n 0\n )\n )\n - sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!=\"rpc\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__rate_interval]),\n 0\n )\n ),\n \"function_name\",\"Block Execution (w/o reads)\",\"instance\",\".*\"\n)", + "expr": "(\n # Calculates the Execute Block first and removes zeroes generated by idelta to smooth the panel\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Execute Block\"}[$__rate_interval]),\n 0\n )\n )\n) > 0\nor\n(\n# Calculates the rest of the instruments, except Execute Block, which was calculated before\n sum by (instance, function_name) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name!~\"Execute Block|Block execution\"}[$__rate_interval]),\n 0\n )\n )\n)\nor\nlabel_replace(\n sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=\"Block execution\"}[$__rate_interval]),\n 0\n )\n )\n - sum by (instance) (\n clamp_min(\n idelta(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace!~\"rpc|engine\", function_name=~\"Storage read|Account read|Account code read|Block hash read\"}[$__rate_interval]),\n 0\n )\n ),\n \"function_name\",\"Block Execution (w/o reads)\",\"instance\",\".*\"\n)", "hide": false, "instant": false, "legendFormat": "{{instance}} {{function_name}}", @@ -1603,50 +1603,78 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, "mappings": [], - "unit": "s" + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { "h": 14, - "w": 7, + "w": 8, "x": 0, "y": 28 }, "id": 113, "options": { - "displayLabels": [ - "percent" - ], "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "sum" + "mean", + "max" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, - "sort": "desc", "tooltip": { "hideZeros": false, - "mode": "multi", - "sort": "desc" + "mode": "single", + "sort": "none" } }, "pluginVersion": "12.2.1", @@ -1659,37 +1687,27 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"})", + "expr": "sum by (function_name) (\n rate(function_duration_seconds_count{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n namespace=\"engine\"\n }[5m])\n)", + "instant": false, "legendFormat": "{{function_name}}", - "instant": true, - "range": false, + "range": true, "refId": "A" } ], - "title": "Engine API Average Time per Method", - "type": "piechart" + "title": "Engine Request Rate by Method", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "filterable": true, - "footer": { - "reducers": [] - }, - "inspect": false - }, "mappings": [], "thresholds": { "mode": "absolute", @@ -1698,10 +1716,6 @@ "color": "green", "value": 0 }, - { - "color": "yellow", - "value": 1 - }, { "color": "red", "value": 4 @@ -1710,159 +1724,39 @@ }, "unit": "s" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Method" - }, - "properties": [ - { - "id": "custom.width", - "value": 350 - } - ] - } - ] - }, - "gridPos": { - "h": 14, - "w": 5, - "x": 7, - "y": 28 - }, - "id": 114, - "options": { - "cellHeight": "sm", - "showHeader": true, - "sortBy": [ - { - "desc": true, - "displayName": "Avg Duration" - } - ] - }, - "pluginVersion": "12.2.1", - "repeat": "instance", - "repeatDirection": "v", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]))", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Top 15 Slowest Engine API Methods (Avg Duration)", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "instance": true, - "job": true, - "namespace": true - }, - "indexByName": {}, - "renameByName": { - "Value": "Avg Duration", - "function_name": "Method" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "Requests/sec", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "showValues": false, - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": 0 - } - ] - }, - "unit": "reqps" - }, "overrides": [] }, "gridPos": { "h": 14, - "w": 6, - "x": 12, + "w": 4, + "x": 8, "y": 28 }, - "id": 111, + "id": 115, + "interval": "10ss", "options": { + "displayMode": "gradient", "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { "calcs": [ - "mean", - "max" + "lastNotNull" ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "fields": "", + "values": false }, - "tooltip": { - "hideZeros": false, - "mode": "multi", - "sort": "desc" - } + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" }, "pluginVersion": "12.2.1", "repeat": "instance", @@ -1874,21 +1768,24 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m]))", - "legendFormat": "{{function_name}}", + "exemplar": false, + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range])) \n/ sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[$__range]))", + "instant": false, + "interval": "", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Engine API Request Rate by Method", - "type": "timeseries" + "title": "Engine API AVG Latency (All Methods)", + "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "description": "Engine API latency percentiles - critical for validators. p99 should be well under 4s for block building.", + "description": "", "fieldConfig": { "defaults": { "color": { @@ -1912,20 +1809,23 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "always", "showValues": false, - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { - "mode": "line" + "mode": "off" } }, "mappings": [], @@ -1935,10 +1835,6 @@ { "color": "green", "value": 0 - }, - { - "color": "red", - "value": 4 } ] }, @@ -1948,18 +1844,19 @@ }, "gridPos": { "h": 14, - "w": 6, - "x": 18, + "w": 12, + "x": 12, "y": 28 }, "id": 112, + "interval": "10ss", "options": { "legend": { "calcs": [ "mean", "max" ], - "displayMode": "table", + "displayMode": "list", "placement": "bottom", "showLegend": true }, @@ -1979,39 +1876,20 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", - "legendFormat": "p50", + "exemplar": false, + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s])) \n/ sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[18s]))", + "instant": false, + "interval": "", + "legendFormat": "__auto", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", - "legendFormat": "p95", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"engine\"}[5m])))", - "legendFormat": "p99", - "range": true, - "refId": "C" } ], - "title": "Engine API Latency Percentiles (All Methods)", + "title": "Engine API Latency (All Methods)", "type": "timeseries" } ], - "title": "Engine API Performance", + "title": "Engine API", "type": "row" }, { @@ -2050,13 +1928,11 @@ "h": 14, "w": 7, "x": 0, - "y": 28 + "y": 29 }, "id": 103, "options": { - "displayLabels": [ - "percent" - ], + "displayLabels": [], "legend": { "displayMode": "table", "placement": "right", @@ -2092,8 +1968,8 @@ }, "editorMode": "code", "expr": "sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"})", - "legendFormat": "{{function_name}}", "instant": true, + "legendFormat": "{{function_name}}", "range": false, "refId": "A" } @@ -2161,7 +2037,7 @@ "h": 14, "w": 5, "x": 7, - "y": 28 + "y": 29 }, "id": 104, "options": { @@ -2184,7 +2060,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "topk(15, rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "expr": "rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])", "format": "table", "instant": true, "legendFormat": "__auto", @@ -2192,7 +2068,7 @@ "refId": "A" } ], - "title": "Top 15 Slowest RPC Methods (Avg Duration)", + "title": "Slowest RPC Methods (Avg Duration)", "transformations": [ { "id": "organize", @@ -2275,7 +2151,7 @@ "h": 14, "w": 6, "x": 12, - "y": 28 + "y": 29 }, "id": 101, "options": { @@ -2306,7 +2182,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]))", + "expr": "sum by (function_name) (rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", "legendFormat": "{{function_name}}", "range": true, "refId": "A" @@ -2377,7 +2253,7 @@ "h": 14, "w": 6, "x": 18, - "y": 28 + "y": 29 }, "id": 102, "options": { @@ -3428,7 +3304,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1150 + "y": 1191 }, "id": 56, "options": { @@ -3527,7 +3403,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1150 + "y": 1191 }, "id": 57, "options": { @@ -3627,7 +3503,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1150 + "y": 1191 }, "id": 58, "options": { @@ -3749,5 +3625,5 @@ "timezone": "utc", "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", - "version": 36 + "version": 37 } From fe929333306c802b10b8a286b918796dfb61ffa5 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 19:19:31 -0300 Subject: [PATCH 12/28] reworked the RPC api panels --- .../common_dashboards/ethrex_l1_perf.json | 69 +++++++------------ 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 859388563ae..4f49d7fdf6f 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1656,9 +1656,9 @@ }, "gridPos": { "h": 14, - "w": 8, + "w": 7, "x": 0, - "y": 28 + "y": 54 }, "id": 113, "options": { @@ -1728,9 +1728,9 @@ }, "gridPos": { "h": 14, - "w": 4, - "x": 8, - "y": 28 + "w": 5, + "x": 7, + "y": 54 }, "id": 115, "interval": "10ss", @@ -1777,7 +1777,7 @@ "refId": "A" } ], - "title": "Engine API AVG Latency (All Methods)", + "title": "Engine Latency by Methods (AVG Duration)", "type": "bargauge" }, { @@ -1846,7 +1846,7 @@ "h": 14, "w": 12, "x": 12, - "y": 28 + "y": 54 }, "id": 112, "interval": "10ss", @@ -1885,7 +1885,7 @@ "refId": "A" } ], - "title": "Engine API Latency (All Methods)", + "title": "Engine Latency by Method", "type": "timeseries" } ], @@ -1928,7 +1928,7 @@ "h": 14, "w": 7, "x": 0, - "y": 29 + "y": 55 }, "id": 103, "options": { @@ -1938,7 +1938,6 @@ "placement": "right", "showLegend": true, "values": [ - "value", "percent" ] }, @@ -1967,7 +1966,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}) / sum by (function_name) (function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"})", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]))", "instant": true, "legendFormat": "{{function_name}}", "range": false, @@ -2037,7 +2036,7 @@ "h": 14, "w": 5, "x": 7, - "y": 29 + "y": 55 }, "id": 104, "options": { @@ -2151,7 +2150,7 @@ "h": 14, "w": 6, "x": 12, - "y": 29 + "y": 55 }, "id": 101, "options": { @@ -2224,9 +2223,9 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "showValues": false, - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", "mode": "none" @@ -2253,7 +2252,7 @@ "h": 14, "w": 6, "x": 18, - "y": 29 + "y": 55 }, "id": 102, "options": { @@ -2282,39 +2281,17 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "histogram_quantile(0.50, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p50", + "expr": "sum by (function_name) (increase(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[18s])) \n/ sum by (function_name) (increase(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[18s]))", + "legendFormat": "{{ function_name }}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p95", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by (le) (rate(function_duration_seconds_bucket{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])))", - "legendFormat": "p99", - "range": true, - "refId": "C" } ], - "title": "RPC Latency Percentiles (All Methods)", + "title": "RPC Latency by Methods", "type": "timeseries" } ], - "title": "RPC Performance", + "title": "RPC API", "type": "row" }, { @@ -3304,7 +3281,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1191 + "y": 1327 }, "id": 56, "options": { @@ -3403,7 +3380,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1191 + "y": 1327 }, "id": 57, "options": { @@ -3503,7 +3480,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1191 + "y": 1327 }, "id": 58, "options": { @@ -3625,5 +3602,5 @@ "timezone": "utc", "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", - "version": 37 + "version": 10 } From bc00add0444149530ed00c93bed054119c96e39b Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 19:44:55 -0300 Subject: [PATCH 13/28] Added instances to the rpc and engine panels --- .../common_dashboards/ethrex_l1_perf.json | 64 +++++++++++++------ 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 4f49d7fdf6f..64943baf90e 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1658,7 +1658,7 @@ "h": 14, "w": 7, "x": 0, - "y": 54 + "y": 28 }, "id": 113, "options": { @@ -1694,7 +1694,7 @@ "refId": "A" } ], - "title": "Engine Request Rate by Method", + "title": "Engine Request Rate by Method - $instance", "type": "timeseries" }, { @@ -1730,7 +1730,7 @@ "h": 14, "w": 5, "x": 7, - "y": 54 + "y": 28 }, "id": 115, "interval": "10ss", @@ -1777,7 +1777,7 @@ "refId": "A" } ], - "title": "Engine Latency by Methods (AVG Duration)", + "title": "Engine Latency by Methods (AVG Duration) - $instance", "type": "bargauge" }, { @@ -1846,7 +1846,7 @@ "h": 14, "w": 12, "x": 12, - "y": 54 + "y": 28 }, "id": 112, "interval": "10ss", @@ -1885,7 +1885,7 @@ "refId": "A" } ], - "title": "Engine Latency by Method", + "title": "Engine Latency by Method - $instance", "type": "timeseries" } ], @@ -1928,7 +1928,7 @@ "h": 14, "w": 7, "x": 0, - "y": 55 + "y": 29 }, "id": 103, "options": { @@ -1973,7 +1973,7 @@ "refId": "A" } ], - "title": "RPC Average Time per Method", + "title": "RPC Average Time per Method - $instance", "type": "piechart" }, { @@ -2036,7 +2036,7 @@ "h": 14, "w": 5, "x": 7, - "y": 55 + "y": 29 }, "id": 104, "options": { @@ -2067,7 +2067,7 @@ "refId": "A" } ], - "title": "Slowest RPC Methods (Avg Duration)", + "title": "Slowest RPC Methods (Avg Duration) - $instance", "transformations": [ { "id": "organize", @@ -2144,13 +2144,39 @@ }, "unit": "reqps" }, - "overrides": [] + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "eth_gasPrice", + "eth_blockNumber" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": true + } + } + ] + } + ] }, "gridPos": { "h": 14, "w": 6, "x": 12, - "y": 55 + "y": 29 }, "id": 101, "options": { @@ -2187,7 +2213,7 @@ "refId": "A" } ], - "title": "RPC Request Rate by Method", + "title": "RPC Request Rate by Method - $instance", "type": "timeseries" }, { @@ -2252,7 +2278,7 @@ "h": 14, "w": 6, "x": 18, - "y": 55 + "y": 29 }, "id": 102, "options": { @@ -2287,7 +2313,7 @@ "refId": "A" } ], - "title": "RPC Latency by Methods", + "title": "RPC Latency by Methods - $instance", "type": "timeseries" } ], @@ -3281,7 +3307,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1327 + "y": 1424 }, "id": 56, "options": { @@ -3380,7 +3406,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1327 + "y": 1424 }, "id": 57, "options": { @@ -3480,7 +3506,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1327 + "y": 1424 }, "id": 58, "options": { @@ -3602,5 +3628,5 @@ "timezone": "utc", "title": "Ethrex L1 - Perf Dashboard", "uid": "beoru4vp59yiof", - "version": 10 + "version": 39 } From 86fccd84adbb4d009500a0b1d3344a1d38a481d6 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Tue, 11 Nov 2025 19:46:49 -0300 Subject: [PATCH 14/28] remove unintended overrides --- .../common_dashboards/ethrex_l1_perf.json | 34 +++---------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 64943baf90e..52a9be94b3a 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -2144,33 +2144,7 @@ }, "unit": "reqps" }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "eth_gasPrice", - "eth_blockNumber" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": true, - "viz": true - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 14, @@ -3307,7 +3281,7 @@ "h": 8, "w": 6, "x": 0, - "y": 1424 + "y": 1438 }, "id": 56, "options": { @@ -3406,7 +3380,7 @@ "h": 8, "w": 9, "x": 6, - "y": 1424 + "y": 1438 }, "id": 57, "options": { @@ -3506,7 +3480,7 @@ "h": 8, "w": 9, "x": 15, - "y": 1424 + "y": 1438 }, "id": 58, "options": { From 5968fd37d8ad4921eadf2fe6aeb88a17f515bbee Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 18:22:39 -0300 Subject: [PATCH 15/28] remove intemediary doc --- RPC_INSTRUMENTATION_APPLIED.md | 94 ---------------------------------- 1 file changed, 94 deletions(-) delete mode 100644 RPC_INSTRUMENTATION_APPLIED.md diff --git a/RPC_INSTRUMENTATION_APPLIED.md b/RPC_INSTRUMENTATION_APPLIED.md deleted file mode 100644 index 269653f0ac9..00000000000 --- a/RPC_INSTRUMENTATION_APPLIED.md +++ /dev/null @@ -1,94 +0,0 @@ -# RPC Instrumentation - Middleware Approach with Namespace Separation - -## What Was Done - -Added per-method profiling for RPC and Engine API calls using a middleware pattern with explicit namespace separation. - -## Changes - -### 1. `crates/blockchain/metrics/profiling.rs` -- Added `namespace` label to histogram: `&["namespace", "function_name"]` -- Uses wrapper structs `MethodName` and `Namespace` to disambiguate span extensions -- Extracts both `namespace` and `method` fields from spans -- Falls back to module path-based detection if namespace not explicitly set -- Supports explicit namespace override via span fields - -### 2. `crates/networking/rpc/rpc.rs` -- Updated middleware function to accept namespace parameter: -```rust -async fn instrumented_call( - namespace: &str, - method: &str, - req: &RpcRequest, - context: RpcApiContext -) -> Result -{ - let span = tracing::trace_span!("rpc_call", namespace = %namespace, method = %method); - let _enter = span.enter(); - T::call(req, context).await -} -``` -- Applied to **60+ RPC methods** with namespace separation: - - **35 eth_\* methods** → `namespace="rpc"` - - **7 debug_\* methods** → `namespace="rpc"` - - **18 engine_\* methods** → `namespace="engine"` - -### 3. `metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json` -- Added **Engine API Performance** row (id: 110) with 4 panels: - - Engine API Time Breakdown (piechart) - - Top 15 Slowest Engine API Methods (table with 4s threshold) - - Engine API Request Rate by Method (timeseries) - - Engine API Latency Percentiles (timeseries with 4s warning line) -- Updated existing **RPC Performance** row (id: 100) with 4 panels: - - RPC Time Breakdown (piechart) - - Top 15 Slowest RPC Methods (table) - - RPC Request Rate by Method (timeseries) - - RPC Latency Percentiles (timeseries) -- Updated **Block Execution Breakdown** panels to filter out RPC/Engine metrics using `namespace!="rpc"` and `namespace!="engine"` - -## Metrics Generated - -```promql -# RPC API metrics (eth_*, debug_*) -function_duration_seconds{namespace="rpc", function_name="eth_blockNumber"} -function_duration_seconds{namespace="rpc", function_name="debug_traceTransaction"} - -# Engine API metrics (engine_*) -function_duration_seconds{namespace="engine", function_name="engine_newPayloadV4"} -function_duration_seconds{namespace="engine", function_name="engine_forkchoiceUpdatedV3"} - -# Block execution metrics (unchanged) -function_duration_seconds{namespace="block_processing", function_name="Execute Block"} -``` - -## Namespace Separation Rationale - -**Engine API** is separated from general **RPC API** because: -- Engine API is **critical for validators** - directly interfaces with consensus clients -- **Payload build time** must be < 4s for block proposals (beacon chain slot timing) -- `newPayload`, `forkchoiceUpdated`, and `getPayload` have strict performance requirements -- Different SLA requirements: Engine API failures impact network consensus, RPC API failures impact user experience -- Enables targeted monitoring and alerting for validator operations - -## Grafana Queries - -```promql -# RPC only -rate(function_duration_seconds_sum{namespace="rpc"}[5m]) - -# RPC latency percentiles -histogram_quantile(0.99, rate(function_duration_seconds_bucket{namespace="rpc"}[5m])) - -# Compare namespaces -sum by (namespace) (rate(function_duration_seconds_sum[5m])) -``` - -## Testing - -```bash -# Start with metrics -cargo run --release -- --metrics-enabled - -# Check metrics -curl http://localhost:6060/metrics | grep 'namespace="rpc"' -``` From f75a0ac7e3c1472e87835f3a78b79445e0593f79 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 19:08:34 -0300 Subject: [PATCH 16/28] initial rework of the instrumentation to avoid complex observers in profiling --- Cargo.lock | 1 + Cargo.toml | 1 + crates/blockchain/metrics/profiling.rs | 100 +++----- crates/networking/rpc/Cargo.toml | 1 + crates/networking/rpc/rpc.rs | 325 ++++++++++++++++++++----- 5 files changed, 301 insertions(+), 127 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 19e43387afa..f3ca6d0e281 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3697,6 +3697,7 @@ dependencies = [ "ethrex-blockchain", "ethrex-common", "ethrex-l2-common", + "ethrex-metrics", "ethrex-p2p", "ethrex-rlp", "ethrex-storage", diff --git a/Cargo.toml b/Cargo.toml index 793f5acf506..a681418044e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,7 @@ ethrex-levm = { path = "./crates/vm/levm" } ethrex-trie = { path = "./crates/common/trie" } ethrex-rlp = { path = "./crates/common/rlp" } ethrex-crypto = { path = "./crates/common/crypto" } +ethrex-metrics = { path = "./crates/blockchain/metrics" } ethrex-l2 = { path = "./crates/l2" } ethrex-l2-common = { path = "./crates/l2/common" } ethrex-sdk = { path = "./crates/l2/sdk" } diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index a28a8025693..bfbad32a6c3 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -1,6 +1,6 @@ use prometheus::{Encoder, HistogramTimer, HistogramVec, TextEncoder, register_histogram_vec}; -use std::sync::LazyLock; -use tracing::{Subscriber, span::{Attributes, Id}, field::{Field, Visit}}; +use std::{future::Future, sync::LazyLock}; +use tracing::{Subscriber, span::Id}; use tracing_subscriber::{Layer, layer::Context, registry::LookupSpan}; use crate::MetricsError; @@ -25,91 +25,35 @@ pub struct FunctionProfilingLayer; /// Wrapper around [`HistogramTimer`] to avoid conflicts with other layers struct ProfileTimer(HistogramTimer); -/// Wrapper to store method name in span extensions -struct MethodName(String); - -/// Wrapper to store namespace in span extensions -struct Namespace(String); - -/// Visitor to extract 'method' and 'namespace' fields from RPC spans for more granular profiling -#[derive(Default)] -struct SpanFieldVisitor { - method: Option, - namespace: Option, -} - -impl Visit for SpanFieldVisitor { - fn record_str(&mut self, field: &Field, value: &str) { - match field.name() { - "method" => self.method = Some(value.to_string()), - "namespace" => self.namespace = Some(value.to_string()), - _ => {} - } - } - - fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { - let value_str = format!("{:?}", value).trim_matches('"').to_string(); - match field.name() { - "method" => self.method = Some(value_str), - "namespace" => self.namespace = Some(value_str), - _ => {} - } - } -} - impl Layer for FunctionProfilingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { - fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) { - // Extract 'method' and 'namespace' fields if present (used by RPC instrumentation) - let mut visitor = SpanFieldVisitor::default(); - attrs.record(&mut visitor); - - if let Some(span) = ctx.span(id) { - let mut extensions = span.extensions_mut(); - if let Some(method_name) = visitor.method { - extensions.insert(MethodName(method_name)); - } - if let Some(namespace) = visitor.namespace { - extensions.insert(Namespace(namespace)); - } - } - } - fn on_enter(&self, id: &Id, ctx: Context<'_, S>) { if let Some(span) = ctx.span(id) && span.metadata().target().starts_with("ethrex") { let target = span.metadata().target(); - let extensions = span.extensions(); - - // First, check if namespace was explicitly set in span (from RPC middleware) - // Otherwise, determine namespace based on the module target - let namespace = if let Some(ns) = extensions.get::() { - ns.0.clone() - } else if target.contains("::rpc") { - "rpc".to_string() - } else if target.contains("blockchain") { - "block_processing".to_string() + + // Skip RPC modules; RPC timing is recorded explicitly at the call sites. + if target.contains("::rpc") { + return; + } + + let namespace = if target.contains("blockchain") { + "block_processing" } else if target.contains("storage") { - "storage".to_string() + "storage" } else if target.contains("networking") { - "networking".to_string() + "networking" } else { - "other".to_string() + "other" }; - // Check if we have a stored method name (from RPC middleware) - // Otherwise fall back to the span name - let function_name = extensions - .get::() - .map(|m| m.0.clone()) - .unwrap_or_else(|| span.metadata().name().to_string()); - drop(extensions); + let function_name = span.metadata().name(); let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[&namespace, &function_name]) + .with_label_values(&[namespace, function_name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); @@ -127,6 +71,20 @@ where } } +/// Record the duration of an async operation under the shared function profiling histogram. +pub async fn record_async_duration(namespace: &str, function_name: &str, future: Fut) -> T +where + Fut: Future, +{ + let timer = METRICS_BLOCK_PROCESSING_PROFILE + .with_label_values(&[namespace, function_name]) + .start_timer(); + + let output = future.await; + timer.observe_duration(); + output +} + pub fn gather_profiling_metrics() -> Result { let encoder = TextEncoder::new(); let metric_families = prometheus::gather(); diff --git a/crates/networking/rpc/Cargo.toml b/crates/networking/rpc/Cargo.toml index 1639f516d10..98051fff58e 100644 --- a/crates/networking/rpc/Cargo.toml +++ b/crates/networking/rpc/Cargo.toml @@ -20,6 +20,7 @@ ethrex-common.workspace = true ethrex-storage.workspace = true ethrex-vm.workspace = true ethrex-blockchain.workspace = true +ethrex-metrics.workspace = true ethrex-p2p = { workspace = true, features = ["test-utils"] } ethrex-rlp.workspace = true ethrex-trie.workspace = true diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index 05fe87771ae..c511abcc19c 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -55,6 +55,7 @@ use bytes::Bytes; use ethrex_blockchain::Blockchain; use ethrex_blockchain::error::ChainError; use ethrex_common::types::Block; +use ethrex_metrics::profiling::record_async_duration; use ethrex_p2p::peer_handler::PeerHandler; use ethrex_p2p::sync_manager::SyncManager; use ethrex_p2p::types::Node; @@ -197,8 +198,7 @@ pub trait RpcHandler: Sized { async fn handle(&self, context: RpcApiContext) -> Result; } -/// Middleware wrapper that instruments RPC method calls with tracing spans for profiling. -/// The profiling layer will use the provided namespace and method name for metrics labels. +/// Middleware wrapper that records RPC method latency directly in the shared profiling histogram. #[inline] async fn instrumented_call( namespace: &str, @@ -206,14 +206,7 @@ async fn instrumented_call( req: &RpcRequest, context: RpcApiContext, ) -> Result { - // Create a span with namespace and method name - // The profiling layer will extract these fields and use them for metrics: - // namespace = "rpc" | "engine" - // function_name = method name (e.g., "eth_blockNumber", "engine_newPayloadV4") - let span = tracing::trace_span!("rpc_call", namespace = %namespace, method = %method); - let _enter = span.enter(); - - T::call(req, context).await + record_async_duration(namespace, method, T::call(req, context)).await } pub const FILTER_DURATION: Duration = { @@ -477,33 +470,122 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul match req.method.as_str() { "eth_chainId" => instrumented_call::("rpc", "eth_chainId", req, context).await, "eth_syncing" => instrumented_call::("rpc", "eth_syncing", req, context).await, - "eth_getBlockByNumber" => instrumented_call::("rpc", "eth_getBlockByNumber", req, context).await, - "eth_getBlockByHash" => instrumented_call::("rpc", "eth_getBlockByHash", req, context).await, - "eth_getBalance" => instrumented_call::("rpc", "eth_getBalance", req, context).await, - "eth_getCode" => instrumented_call::("rpc", "eth_getCode", req, context).await, - "eth_getStorageAt" => instrumented_call::("rpc", "eth_getStorageAt", req, context).await, + "eth_getBlockByNumber" => { + instrumented_call::( + "rpc", + "eth_getBlockByNumber", + req, + context, + ) + .await + } + "eth_getBlockByHash" => { + instrumented_call::("rpc", "eth_getBlockByHash", req, context) + .await + } + "eth_getBalance" => { + instrumented_call::("rpc", "eth_getBalance", req, context).await + } + "eth_getCode" => { + instrumented_call::("rpc", "eth_getCode", req, context).await + } + "eth_getStorageAt" => { + instrumented_call::("rpc", "eth_getStorageAt", req, context).await + } "eth_getBlockTransactionCountByNumber" => { - instrumented_call::("rpc", "eth_getBlockTransactionCountByNumber", req, context).await + instrumented_call::( + "rpc", + "eth_getBlockTransactionCountByNumber", + req, + context, + ) + .await } "eth_getBlockTransactionCountByHash" => { - instrumented_call::("rpc", "eth_getBlockTransactionCountByHash", req, context).await + instrumented_call::( + "rpc", + "eth_getBlockTransactionCountByHash", + req, + context, + ) + .await } "eth_getTransactionByBlockNumberAndIndex" => { - instrumented_call::("rpc", "eth_getTransactionByBlockNumberAndIndex", req, context).await + instrumented_call::( + "rpc", + "eth_getTransactionByBlockNumberAndIndex", + req, + context, + ) + .await } "eth_getTransactionByBlockHashAndIndex" => { - instrumented_call::("rpc", "eth_getTransactionByBlockHashAndIndex", req, context).await + instrumented_call::( + "rpc", + "eth_getTransactionByBlockHashAndIndex", + req, + context, + ) + .await + } + "eth_getBlockReceipts" => { + instrumented_call::( + "rpc", + "eth_getBlockReceipts", + req, + context, + ) + .await + } + "eth_getTransactionByHash" => { + instrumented_call::( + "rpc", + "eth_getTransactionByHash", + req, + context, + ) + .await + } + "eth_getTransactionReceipt" => { + instrumented_call::( + "rpc", + "eth_getTransactionReceipt", + req, + context, + ) + .await + } + "eth_createAccessList" => { + instrumented_call::( + "rpc", + "eth_createAccessList", + req, + context, + ) + .await + } + "eth_blockNumber" => { + instrumented_call::("rpc", "eth_blockNumber", req, context).await } - "eth_getBlockReceipts" => instrumented_call::("rpc", "eth_getBlockReceipts", req, context).await, - "eth_getTransactionByHash" => instrumented_call::("rpc", "eth_getTransactionByHash", req, context).await, - "eth_getTransactionReceipt" => instrumented_call::("rpc", "eth_getTransactionReceipt", req, context).await, - "eth_createAccessList" => instrumented_call::("rpc", "eth_createAccessList", req, context).await, - "eth_blockNumber" => instrumented_call::("rpc", "eth_blockNumber", req, context).await, "eth_call" => instrumented_call::("rpc", "eth_call", req, context).await, - "eth_blobBaseFee" => instrumented_call::("rpc", "eth_blobBaseFee", req, context).await, - "eth_getTransactionCount" => instrumented_call::("rpc", "eth_getTransactionCount", req, context).await, - "eth_feeHistory" => instrumented_call::("rpc", "eth_feeHistory", req, context).await, - "eth_estimateGas" => instrumented_call::("rpc", "eth_estimateGas", req, context).await, + "eth_blobBaseFee" => { + instrumented_call::("rpc", "eth_blobBaseFee", req, context).await + } + "eth_getTransactionCount" => { + instrumented_call::( + "rpc", + "eth_getTransactionCount", + req, + context, + ) + .await + } + "eth_feeHistory" => { + instrumented_call::("rpc", "eth_feeHistory", req, context).await + } + "eth_estimateGas" => { + instrumented_call::("rpc", "eth_estimateGas", req, context).await + } "eth_getLogs" => instrumented_call::("rpc", "eth_getLogs", req, context).await, "eth_newFilter" => { NewFilterRequest::stateful_call(req, context.storage, context.active_filters).await @@ -514,11 +596,27 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul "eth_getFilterChanges" => { FilterChangesRequest::stateful_call(req, context.storage, context.active_filters).await } - "eth_sendRawTransaction" => instrumented_call::("rpc", "eth_sendRawTransaction", req, context).await, - "eth_getProof" => instrumented_call::("rpc", "eth_getProof", req, context).await, + "eth_sendRawTransaction" => { + instrumented_call::( + "rpc", + "eth_sendRawTransaction", + req, + context, + ) + .await + } + "eth_getProof" => { + instrumented_call::("rpc", "eth_getProof", req, context).await + } "eth_gasPrice" => instrumented_call::("rpc", "eth_gasPrice", req, context).await, "eth_maxPriorityFeePerGas" => { - instrumented_call::("rpc", "eth_maxPriorityFeePerGas", req, context).await + instrumented_call::( + "rpc", + "eth_maxPriorityFeePerGas", + req, + context, + ) + .await } "eth_config" => instrumented_call::("rpc", "eth_config", req, context).await, unknown_eth_method => Err(RpcErr::MethodNotFound(unknown_eth_method.to_owned())), @@ -527,13 +625,47 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "debug_getRawHeader" => instrumented_call::("rpc", "debug_getRawHeader", req, context).await, - "debug_getRawBlock" => instrumented_call::("rpc", "debug_getRawBlock", req, context).await, - "debug_getRawTransaction" => instrumented_call::("rpc", "debug_getRawTransaction", req, context).await, - "debug_getRawReceipts" => instrumented_call::("rpc", "debug_getRawReceipts", req, context).await, - "debug_executionWitness" => instrumented_call::("rpc", "debug_executionWitness", req, context).await, - "debug_traceTransaction" => instrumented_call::("rpc", "debug_traceTransaction", req, context).await, - "debug_traceBlockByNumber" => instrumented_call::("rpc", "debug_traceBlockByNumber", req, context).await, + "debug_getRawHeader" => { + instrumented_call::("rpc", "debug_getRawHeader", req, context) + .await + } + "debug_getRawBlock" => { + instrumented_call::("rpc", "debug_getRawBlock", req, context).await + } + "debug_getRawTransaction" => { + instrumented_call::("rpc", "debug_getRawTransaction", req, context) + .await + } + "debug_getRawReceipts" => { + instrumented_call::("rpc", "debug_getRawReceipts", req, context).await + } + "debug_executionWitness" => { + instrumented_call::( + "rpc", + "debug_executionWitness", + req, + context, + ) + .await + } + "debug_traceTransaction" => { + instrumented_call::( + "rpc", + "debug_traceTransaction", + req, + context, + ) + .await + } + "debug_traceBlockByNumber" => { + instrumented_call::( + "rpc", + "debug_traceBlockByNumber", + req, + context, + ) + .await + } unknown_debug_method => Err(RpcErr::MethodNotFound(unknown_debug_method.to_owned())), } } @@ -543,30 +675,111 @@ pub async fn map_engine_requests( context: RpcApiContext, ) -> Result { match req.method.as_str() { - "engine_exchangeCapabilities" => instrumented_call::("engine", "engine_exchangeCapabilities", req, context).await, - "engine_forkchoiceUpdatedV1" => instrumented_call::("engine", "engine_forkchoiceUpdatedV1", req, context).await, - "engine_forkchoiceUpdatedV2" => instrumented_call::("engine", "engine_forkchoiceUpdatedV2", req, context).await, - "engine_forkchoiceUpdatedV3" => instrumented_call::("engine", "engine_forkchoiceUpdatedV3", req, context).await, - "engine_newPayloadV4" => instrumented_call::("engine", "engine_newPayloadV4", req, context).await, - "engine_newPayloadV3" => instrumented_call::("engine", "engine_newPayloadV3", req, context).await, - "engine_newPayloadV2" => instrumented_call::("engine", "engine_newPayloadV2", req, context).await, - "engine_newPayloadV1" => instrumented_call::("engine", "engine_newPayloadV1", req, context).await, + "engine_exchangeCapabilities" => { + instrumented_call::( + "engine", + "engine_exchangeCapabilities", + req, + context, + ) + .await + } + "engine_forkchoiceUpdatedV1" => { + instrumented_call::( + "engine", + "engine_forkchoiceUpdatedV1", + req, + context, + ) + .await + } + "engine_forkchoiceUpdatedV2" => { + instrumented_call::( + "engine", + "engine_forkchoiceUpdatedV2", + req, + context, + ) + .await + } + "engine_forkchoiceUpdatedV3" => { + instrumented_call::( + "engine", + "engine_forkchoiceUpdatedV3", + req, + context, + ) + .await + } + "engine_newPayloadV4" => { + instrumented_call::("engine", "engine_newPayloadV4", req, context) + .await + } + "engine_newPayloadV3" => { + instrumented_call::("engine", "engine_newPayloadV3", req, context) + .await + } + "engine_newPayloadV2" => { + instrumented_call::("engine", "engine_newPayloadV2", req, context) + .await + } + "engine_newPayloadV1" => { + instrumented_call::("engine", "engine_newPayloadV1", req, context) + .await + } "engine_exchangeTransitionConfigurationV1" => { - instrumented_call::("engine", "engine_exchangeTransitionConfigurationV1", req, context).await + instrumented_call::( + "engine", + "engine_exchangeTransitionConfigurationV1", + req, + context, + ) + .await + } + "engine_getPayloadV5" => { + instrumented_call::("engine", "engine_getPayloadV5", req, context) + .await + } + "engine_getPayloadV4" => { + instrumented_call::("engine", "engine_getPayloadV4", req, context) + .await + } + "engine_getPayloadV3" => { + instrumented_call::("engine", "engine_getPayloadV3", req, context) + .await + } + "engine_getPayloadV2" => { + instrumented_call::("engine", "engine_getPayloadV2", req, context) + .await + } + "engine_getPayloadV1" => { + instrumented_call::("engine", "engine_getPayloadV1", req, context) + .await } - "engine_getPayloadV5" => instrumented_call::("engine", "engine_getPayloadV5", req, context).await, - "engine_getPayloadV4" => instrumented_call::("engine", "engine_getPayloadV4", req, context).await, - "engine_getPayloadV3" => instrumented_call::("engine", "engine_getPayloadV3", req, context).await, - "engine_getPayloadV2" => instrumented_call::("engine", "engine_getPayloadV2", req, context).await, - "engine_getPayloadV1" => instrumented_call::("engine", "engine_getPayloadV1", req, context).await, "engine_getPayloadBodiesByHashV1" => { - instrumented_call::("engine", "engine_getPayloadBodiesByHashV1", req, context).await + instrumented_call::( + "engine", + "engine_getPayloadBodiesByHashV1", + req, + context, + ) + .await } "engine_getPayloadBodiesByRangeV1" => { - instrumented_call::("engine", "engine_getPayloadBodiesByRangeV1", req, context).await + instrumented_call::( + "engine", + "engine_getPayloadBodiesByRangeV1", + req, + context, + ) + .await + } + "engine_getBlobsV1" => { + instrumented_call::("engine", "engine_getBlobsV1", req, context).await + } + "engine_getBlobsV2" => { + instrumented_call::("engine", "engine_getBlobsV2", req, context).await } - "engine_getBlobsV1" => instrumented_call::("engine", "engine_getBlobsV1", req, context).await, - "engine_getBlobsV2" => instrumented_call::("engine", "engine_getBlobsV2", req, context).await, unknown_engine_method => Err(RpcErr::MethodNotFound(unknown_engine_method.to_owned())), } } From 6c4a9f5c93e3093b92680bea5046683a256078d3 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 19:29:57 -0300 Subject: [PATCH 17/28] go back to use just call to avoid most of rpc.rs changes made --- crates/networking/rpc/rpc.rs | 347 +++++++---------------------------- 1 file changed, 62 insertions(+), 285 deletions(-) diff --git a/crates/networking/rpc/rpc.rs b/crates/networking/rpc/rpc.rs index c511abcc19c..c7d2b5b9c42 100644 --- a/crates/networking/rpc/rpc.rs +++ b/crates/networking/rpc/rpc.rs @@ -192,23 +192,20 @@ pub trait RpcHandler: Sized { async fn call(req: &RpcRequest, context: RpcApiContext) -> Result { let request = Self::parse(&req.params)?; - request.handle(context).await + let namespace = match req.namespace() { + Ok(RpcNamespace::Engine) => "engine", + _ => "rpc", + }; + + record_async_duration(namespace, req.method.as_str(), async move { + request.handle(context).await + }) + .await } async fn handle(&self, context: RpcApiContext) -> Result; } -/// Middleware wrapper that records RPC method latency directly in the shared profiling histogram. -#[inline] -async fn instrumented_call( - namespace: &str, - method: &str, - req: &RpcRequest, - context: RpcApiContext, -) -> Result { - record_async_duration(namespace, method, T::call(req, context)).await -} - pub const FILTER_DURATION: Duration = { if cfg!(test) { Duration::from_secs(1) @@ -468,125 +465,36 @@ pub async fn map_authrpc_requests( pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "eth_chainId" => instrumented_call::("rpc", "eth_chainId", req, context).await, - "eth_syncing" => instrumented_call::("rpc", "eth_syncing", req, context).await, - "eth_getBlockByNumber" => { - instrumented_call::( - "rpc", - "eth_getBlockByNumber", - req, - context, - ) - .await - } - "eth_getBlockByHash" => { - instrumented_call::("rpc", "eth_getBlockByHash", req, context) - .await - } - "eth_getBalance" => { - instrumented_call::("rpc", "eth_getBalance", req, context).await - } - "eth_getCode" => { - instrumented_call::("rpc", "eth_getCode", req, context).await - } - "eth_getStorageAt" => { - instrumented_call::("rpc", "eth_getStorageAt", req, context).await - } + "eth_chainId" => ChainId::call(req, context).await, + "eth_syncing" => Syncing::call(req, context).await, + "eth_getBlockByNumber" => GetBlockByNumberRequest::call(req, context).await, + "eth_getBlockByHash" => GetBlockByHashRequest::call(req, context).await, + "eth_getBalance" => GetBalanceRequest::call(req, context).await, + "eth_getCode" => GetCodeRequest::call(req, context).await, + "eth_getStorageAt" => GetStorageAtRequest::call(req, context).await, "eth_getBlockTransactionCountByNumber" => { - instrumented_call::( - "rpc", - "eth_getBlockTransactionCountByNumber", - req, - context, - ) - .await + GetBlockTransactionCountRequest::call(req, context).await } "eth_getBlockTransactionCountByHash" => { - instrumented_call::( - "rpc", - "eth_getBlockTransactionCountByHash", - req, - context, - ) - .await + GetBlockTransactionCountRequest::call(req, context).await } "eth_getTransactionByBlockNumberAndIndex" => { - instrumented_call::( - "rpc", - "eth_getTransactionByBlockNumberAndIndex", - req, - context, - ) - .await + GetTransactionByBlockNumberAndIndexRequest::call(req, context).await } "eth_getTransactionByBlockHashAndIndex" => { - instrumented_call::( - "rpc", - "eth_getTransactionByBlockHashAndIndex", - req, - context, - ) - .await - } - "eth_getBlockReceipts" => { - instrumented_call::( - "rpc", - "eth_getBlockReceipts", - req, - context, - ) - .await - } - "eth_getTransactionByHash" => { - instrumented_call::( - "rpc", - "eth_getTransactionByHash", - req, - context, - ) - .await - } - "eth_getTransactionReceipt" => { - instrumented_call::( - "rpc", - "eth_getTransactionReceipt", - req, - context, - ) - .await - } - "eth_createAccessList" => { - instrumented_call::( - "rpc", - "eth_createAccessList", - req, - context, - ) - .await - } - "eth_blockNumber" => { - instrumented_call::("rpc", "eth_blockNumber", req, context).await - } - "eth_call" => instrumented_call::("rpc", "eth_call", req, context).await, - "eth_blobBaseFee" => { - instrumented_call::("rpc", "eth_blobBaseFee", req, context).await - } - "eth_getTransactionCount" => { - instrumented_call::( - "rpc", - "eth_getTransactionCount", - req, - context, - ) - .await - } - "eth_feeHistory" => { - instrumented_call::("rpc", "eth_feeHistory", req, context).await - } - "eth_estimateGas" => { - instrumented_call::("rpc", "eth_estimateGas", req, context).await - } - "eth_getLogs" => instrumented_call::("rpc", "eth_getLogs", req, context).await, + GetTransactionByBlockHashAndIndexRequest::call(req, context).await + } + "eth_getBlockReceipts" => GetBlockReceiptsRequest::call(req, context).await, + "eth_getTransactionByHash" => GetTransactionByHashRequest::call(req, context).await, + "eth_getTransactionReceipt" => GetTransactionReceiptRequest::call(req, context).await, + "eth_createAccessList" => CreateAccessListRequest::call(req, context).await, + "eth_blockNumber" => BlockNumberRequest::call(req, context).await, + "eth_call" => CallRequest::call(req, context).await, + "eth_blobBaseFee" => GetBlobBaseFee::call(req, context).await, + "eth_getTransactionCount" => GetTransactionCountRequest::call(req, context).await, + "eth_feeHistory" => FeeHistoryRequest::call(req, context).await, + "eth_estimateGas" => EstimateGasRequest::call(req, context).await, + "eth_getLogs" => LogsFilter::call(req, context).await, "eth_newFilter" => { NewFilterRequest::stateful_call(req, context.storage, context.active_filters).await } @@ -596,76 +504,26 @@ pub async fn map_eth_requests(req: &RpcRequest, context: RpcApiContext) -> Resul "eth_getFilterChanges" => { FilterChangesRequest::stateful_call(req, context.storage, context.active_filters).await } - "eth_sendRawTransaction" => { - instrumented_call::( - "rpc", - "eth_sendRawTransaction", - req, - context, - ) - .await - } - "eth_getProof" => { - instrumented_call::("rpc", "eth_getProof", req, context).await - } - "eth_gasPrice" => instrumented_call::("rpc", "eth_gasPrice", req, context).await, + "eth_sendRawTransaction" => SendRawTransactionRequest::call(req, context).await, + "eth_getProof" => GetProofRequest::call(req, context).await, + "eth_gasPrice" => GasPrice::call(req, context).await, "eth_maxPriorityFeePerGas" => { - instrumented_call::( - "rpc", - "eth_maxPriorityFeePerGas", - req, - context, - ) - .await + eth::max_priority_fee::MaxPriorityFee::call(req, context).await } - "eth_config" => instrumented_call::("rpc", "eth_config", req, context).await, + "eth_config" => Config::call(req, context).await, unknown_eth_method => Err(RpcErr::MethodNotFound(unknown_eth_method.to_owned())), } } pub async fn map_debug_requests(req: &RpcRequest, context: RpcApiContext) -> Result { match req.method.as_str() { - "debug_getRawHeader" => { - instrumented_call::("rpc", "debug_getRawHeader", req, context) - .await - } - "debug_getRawBlock" => { - instrumented_call::("rpc", "debug_getRawBlock", req, context).await - } - "debug_getRawTransaction" => { - instrumented_call::("rpc", "debug_getRawTransaction", req, context) - .await - } - "debug_getRawReceipts" => { - instrumented_call::("rpc", "debug_getRawReceipts", req, context).await - } - "debug_executionWitness" => { - instrumented_call::( - "rpc", - "debug_executionWitness", - req, - context, - ) - .await - } - "debug_traceTransaction" => { - instrumented_call::( - "rpc", - "debug_traceTransaction", - req, - context, - ) - .await - } - "debug_traceBlockByNumber" => { - instrumented_call::( - "rpc", - "debug_traceBlockByNumber", - req, - context, - ) - .await - } + "debug_getRawHeader" => GetRawHeaderRequest::call(req, context).await, + "debug_getRawBlock" => GetRawBlockRequest::call(req, context).await, + "debug_getRawTransaction" => GetRawTransaction::call(req, context).await, + "debug_getRawReceipts" => GetRawReceipts::call(req, context).await, + "debug_executionWitness" => ExecutionWitnessRequest::call(req, context).await, + "debug_traceTransaction" => TraceTransactionRequest::call(req, context).await, + "debug_traceBlockByNumber" => TraceBlockByNumberRequest::call(req, context).await, unknown_debug_method => Err(RpcErr::MethodNotFound(unknown_debug_method.to_owned())), } } @@ -675,111 +533,30 @@ pub async fn map_engine_requests( context: RpcApiContext, ) -> Result { match req.method.as_str() { - "engine_exchangeCapabilities" => { - instrumented_call::( - "engine", - "engine_exchangeCapabilities", - req, - context, - ) - .await - } - "engine_forkchoiceUpdatedV1" => { - instrumented_call::( - "engine", - "engine_forkchoiceUpdatedV1", - req, - context, - ) - .await - } - "engine_forkchoiceUpdatedV2" => { - instrumented_call::( - "engine", - "engine_forkchoiceUpdatedV2", - req, - context, - ) - .await - } - "engine_forkchoiceUpdatedV3" => { - instrumented_call::( - "engine", - "engine_forkchoiceUpdatedV3", - req, - context, - ) - .await - } - "engine_newPayloadV4" => { - instrumented_call::("engine", "engine_newPayloadV4", req, context) - .await - } - "engine_newPayloadV3" => { - instrumented_call::("engine", "engine_newPayloadV3", req, context) - .await - } - "engine_newPayloadV2" => { - instrumented_call::("engine", "engine_newPayloadV2", req, context) - .await - } - "engine_newPayloadV1" => { - instrumented_call::("engine", "engine_newPayloadV1", req, context) - .await - } + "engine_exchangeCapabilities" => ExchangeCapabilitiesRequest::call(req, context).await, + "engine_forkchoiceUpdatedV1" => ForkChoiceUpdatedV1::call(req, context).await, + "engine_forkchoiceUpdatedV2" => ForkChoiceUpdatedV2::call(req, context).await, + "engine_forkchoiceUpdatedV3" => ForkChoiceUpdatedV3::call(req, context).await, + "engine_newPayloadV4" => NewPayloadV4Request::call(req, context).await, + "engine_newPayloadV3" => NewPayloadV3Request::call(req, context).await, + "engine_newPayloadV2" => NewPayloadV2Request::call(req, context).await, + "engine_newPayloadV1" => NewPayloadV1Request::call(req, context).await, "engine_exchangeTransitionConfigurationV1" => { - instrumented_call::( - "engine", - "engine_exchangeTransitionConfigurationV1", - req, - context, - ) - .await - } - "engine_getPayloadV5" => { - instrumented_call::("engine", "engine_getPayloadV5", req, context) - .await - } - "engine_getPayloadV4" => { - instrumented_call::("engine", "engine_getPayloadV4", req, context) - .await - } - "engine_getPayloadV3" => { - instrumented_call::("engine", "engine_getPayloadV3", req, context) - .await - } - "engine_getPayloadV2" => { - instrumented_call::("engine", "engine_getPayloadV2", req, context) - .await - } - "engine_getPayloadV1" => { - instrumented_call::("engine", "engine_getPayloadV1", req, context) - .await + ExchangeTransitionConfigV1Req::call(req, context).await } + "engine_getPayloadV5" => GetPayloadV5Request::call(req, context).await, + "engine_getPayloadV4" => GetPayloadV4Request::call(req, context).await, + "engine_getPayloadV3" => GetPayloadV3Request::call(req, context).await, + "engine_getPayloadV2" => GetPayloadV2Request::call(req, context).await, + "engine_getPayloadV1" => GetPayloadV1Request::call(req, context).await, "engine_getPayloadBodiesByHashV1" => { - instrumented_call::( - "engine", - "engine_getPayloadBodiesByHashV1", - req, - context, - ) - .await + GetPayloadBodiesByHashV1Request::call(req, context).await } "engine_getPayloadBodiesByRangeV1" => { - instrumented_call::( - "engine", - "engine_getPayloadBodiesByRangeV1", - req, - context, - ) - .await - } - "engine_getBlobsV1" => { - instrumented_call::("engine", "engine_getBlobsV1", req, context).await - } - "engine_getBlobsV2" => { - instrumented_call::("engine", "engine_getBlobsV2", req, context).await + GetPayloadBodiesByRangeV1Request::call(req, context).await } + "engine_getBlobsV1" => BlobsV1Request::call(req, context).await, + "engine_getBlobsV2" => BlobsV2Request::call(req, context).await, unknown_engine_method => Err(RpcErr::MethodNotFound(unknown_engine_method.to_owned())), } } From 024b3f7f1113ffcb8fcdea126963504deaffddbf Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 20:09:34 -0300 Subject: [PATCH 18/28] Add namespace to profiling and explicitly set it up in the previous instrumentation --- crates/blockchain/blockchain.rs | 21 ++++++-- crates/blockchain/metrics/profiling.rs | 67 +++++++++++++++++++++----- crates/blockchain/vm.rs | 28 +++++++++-- crates/vm/backends/mod.rs | 7 ++- 4 files changed, 103 insertions(+), 20 deletions(-) diff --git a/crates/blockchain/blockchain.rs b/crates/blockchain/blockchain.rs index 9a8e182abbc..9a07c7fb4ae 100644 --- a/crates/blockchain/blockchain.rs +++ b/crates/blockchain/blockchain.rs @@ -180,7 +180,12 @@ impl Blockchain { } /// Executes a block withing a new vm instance and state - #[instrument(level = "trace", name = "Execute Block", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Execute Block", + skip_all + )] fn execute_block_pipeline( &self, block: &Block, @@ -280,7 +285,12 @@ impl Blockchain { )) } - #[instrument(level = "trace", name = "Trie update", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Trie update", + skip_all + )] fn handle_merkleization( &self, rx: Receiver>, @@ -772,7 +782,12 @@ impl Blockchain { }) } - #[instrument(level = "trace", name = "Block DB update", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Block DB update", + skip_all + )] pub fn store_block( &self, block: Block, diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index bfbad32a6c3..992b689dec2 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -1,6 +1,10 @@ use prometheus::{Encoder, HistogramTimer, HistogramVec, TextEncoder, register_histogram_vec}; -use std::{future::Future, sync::LazyLock}; -use tracing::{Subscriber, span::Id}; +use std::{borrow::Cow, future::Future, sync::LazyLock}; +use tracing::{ + Subscriber, + field::{Field, Visit}, + span::{Attributes, Id}, +}; use tracing_subscriber::{Layer, layer::Context, registry::LookupSpan}; use crate::MetricsError; @@ -25,10 +29,43 @@ pub struct FunctionProfilingLayer; /// Wrapper around [`HistogramTimer`] to avoid conflicts with other layers struct ProfileTimer(HistogramTimer); +/// Span extension storing the profiling namespace selected by instrumentation. +struct Namespace(Cow<'static, str>); + +#[derive(Default)] +struct NamespaceVisitor { + namespace: Option>, +} + +impl Visit for NamespaceVisitor { + fn record_str(&mut self, field: &Field, value: &str) { + if field.name() == "namespace" { + self.namespace = Some(Cow::Owned(value.to_owned())); + } + } + + fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { + if field.name() == "namespace" { + self.namespace = Some(Cow::Owned( + format!("{value:?}").trim_matches('"').to_owned(), + )); + } + } +} + impl Layer for FunctionProfilingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { + fn on_new_span(&self, attrs: &Attributes<'_>, id: &Id, ctx: Context<'_, S>) { + let mut visitor = NamespaceVisitor::default(); + attrs.record(&mut visitor); + + if let (Some(span), Some(namespace)) = (ctx.span(id), visitor.namespace) { + span.extensions_mut().insert(Namespace(namespace)); + } + } + fn on_enter(&self, id: &Id, ctx: Context<'_, S>) { if let Some(span) = ctx.span(id) && span.metadata().target().starts_with("ethrex") @@ -40,20 +77,26 @@ where return; } - let namespace = if target.contains("blockchain") { - "block_processing" - } else if target.contains("storage") { - "storage" - } else if target.contains("networking") { - "networking" - } else { - "other" - }; + let namespace = span + .extensions() + .get::() + .map(|ns| ns.0.clone()) + .unwrap_or_else(|| { + if target.contains("blockchain") { + Cow::Borrowed("block_processing") + } else if target.contains("storage") { + Cow::Borrowed("storage") + } else if target.contains("networking") { + Cow::Borrowed("networking") + } else { + Cow::Borrowed("other") + } + }); let function_name = span.metadata().name(); let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[namespace, function_name]) + .with_label_values(&[namespace.as_ref(), function_name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); diff --git a/crates/blockchain/vm.rs b/crates/blockchain/vm.rs index 7a5def34969..b7b0eda00b5 100644 --- a/crates/blockchain/vm.rs +++ b/crates/blockchain/vm.rs @@ -44,21 +44,36 @@ impl StoreVmDatabase { } impl VmDatabase for StoreVmDatabase { - #[instrument(level = "trace", name = "Account read", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Account read", + skip_all + )] fn get_account_state(&self, address: Address) -> Result, EvmError> { self.store .get_account_state_by_root(self.state_root, address) .map_err(|e| EvmError::DB(e.to_string())) } - #[instrument(level = "trace", name = "Storage read", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Storage read", + skip_all + )] fn get_storage_slot(&self, address: Address, key: H256) -> Result, EvmError> { self.store .get_storage_at_root(self.state_root, address, key) .map_err(|e| EvmError::DB(e.to_string())) } - #[instrument(level = "trace", name = "Block hash read", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Block hash read", + skip_all + )] fn get_block_hash(&self, block_number: u64) -> Result { // Check if we have it cached if let Some(block_hash) = self.block_hash_cache.get(&block_number) { @@ -103,7 +118,12 @@ impl VmDatabase for StoreVmDatabase { Ok(self.store.get_chain_config()) } - #[instrument(level = "trace", name = "Account code read", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Account code read", + skip_all + )] fn get_account_code(&self, code_hash: H256) -> Result { if code_hash == *EMPTY_KECCACK_HASH { return Ok(Code::default()); diff --git a/crates/vm/backends/mod.rs b/crates/vm/backends/mod.rs index 640be048178..52cdc2e97b1 100644 --- a/crates/vm/backends/mod.rs +++ b/crates/vm/backends/mod.rs @@ -77,7 +77,12 @@ impl Evm { LEVM::execute_block(block, &mut self.db, self.vm_type) } - #[instrument(level = "trace", name = "Block execution", skip_all)] + #[instrument( + level = "trace", + namespace = "block_execution", + name = "Block execution", + skip_all + )] pub fn execute_block_pipeline( &mut self, block: &Block, From c51eeeabe8c375d0b720778ebb46a034a60b9e13 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 20:16:04 -0300 Subject: [PATCH 19/28] fixed an issue with the way namespace is send through #instrument --- crates/blockchain/blockchain.rs | 12 ++++++------ crates/blockchain/vm.rs | 16 ++++++++-------- crates/vm/backends/mod.rs | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/blockchain/blockchain.rs b/crates/blockchain/blockchain.rs index 9a07c7fb4ae..639b1f9a68e 100644 --- a/crates/blockchain/blockchain.rs +++ b/crates/blockchain/blockchain.rs @@ -182,9 +182,9 @@ impl Blockchain { /// Executes a block withing a new vm instance and state #[instrument( level = "trace", - namespace = "block_execution", name = "Execute Block", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn execute_block_pipeline( &self, @@ -287,9 +287,9 @@ impl Blockchain { #[instrument( level = "trace", - namespace = "block_execution", name = "Trie update", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn handle_merkleization( &self, @@ -784,9 +784,9 @@ impl Blockchain { #[instrument( level = "trace", - namespace = "block_execution", name = "Block DB update", - skip_all + skip_all, + fields(namespace = "block_execution") )] pub fn store_block( &self, diff --git a/crates/blockchain/vm.rs b/crates/blockchain/vm.rs index b7b0eda00b5..a4a800ccdad 100644 --- a/crates/blockchain/vm.rs +++ b/crates/blockchain/vm.rs @@ -46,9 +46,9 @@ impl StoreVmDatabase { impl VmDatabase for StoreVmDatabase { #[instrument( level = "trace", - namespace = "block_execution", name = "Account read", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn get_account_state(&self, address: Address) -> Result, EvmError> { self.store @@ -58,9 +58,9 @@ impl VmDatabase for StoreVmDatabase { #[instrument( level = "trace", - namespace = "block_execution", name = "Storage read", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn get_storage_slot(&self, address: Address, key: H256) -> Result, EvmError> { self.store @@ -70,9 +70,9 @@ impl VmDatabase for StoreVmDatabase { #[instrument( level = "trace", - namespace = "block_execution", name = "Block hash read", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn get_block_hash(&self, block_number: u64) -> Result { // Check if we have it cached @@ -120,9 +120,9 @@ impl VmDatabase for StoreVmDatabase { #[instrument( level = "trace", - namespace = "block_execution", name = "Account code read", - skip_all + skip_all, + fields(namespace = "block_execution") )] fn get_account_code(&self, code_hash: H256) -> Result { if code_hash == *EMPTY_KECCACK_HASH { diff --git a/crates/vm/backends/mod.rs b/crates/vm/backends/mod.rs index 52cdc2e97b1..8bbacc2aebd 100644 --- a/crates/vm/backends/mod.rs +++ b/crates/vm/backends/mod.rs @@ -79,9 +79,9 @@ impl Evm { #[instrument( level = "trace", - namespace = "block_execution", name = "Block execution", - skip_all + skip_all, + fields(namespace = "block_execution") )] pub fn execute_block_pipeline( &mut self, From 28ec41f8f5aa968d868de12127046bf16bdac162 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Wed, 12 Nov 2025 21:21:50 -0300 Subject: [PATCH 20/28] enhanced record_debug and remove magic namespacing given targets --- crates/blockchain/metrics/profiling.rs | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 992b689dec2..77320eadec3 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -46,9 +46,12 @@ impl Visit for NamespaceVisitor { fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) { if field.name() == "namespace" { - self.namespace = Some(Cow::Owned( - format!("{value:?}").trim_matches('"').to_owned(), - )); + let rendered = format!("{value:?}"); + let cleaned = rendered + .strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .unwrap_or(&rendered); + self.namespace = Some(Cow::Owned(cleaned.to_owned())); } } } @@ -81,17 +84,7 @@ where .extensions() .get::() .map(|ns| ns.0.clone()) - .unwrap_or_else(|| { - if target.contains("blockchain") { - Cow::Borrowed("block_processing") - } else if target.contains("storage") { - Cow::Borrowed("storage") - } else if target.contains("networking") { - Cow::Borrowed("networking") - } else { - Cow::Borrowed("other") - } - }); + .unwrap_or_else(|| { Cow::Borrowed("default") }); let function_name = span.metadata().name(); From 6e5b0419db220fe2489ab59c0b033a1fe81e5bac Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 10:36:50 -0300 Subject: [PATCH 21/28] format --- crates/blockchain/metrics/profiling.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 77320eadec3..8611ebbb6c5 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -84,7 +84,7 @@ where .extensions() .get::() .map(|ns| ns.0.clone()) - .unwrap_or_else(|| { Cow::Borrowed("default") }); + .unwrap_or_else(|| Cow::Borrowed("default")); let function_name = span.metadata().name(); From f6a401b142fded5c972fea18edc9828956c996d6 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 14:54:05 -0300 Subject: [PATCH 22/28] apply suggestions Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/blockchain/metrics/profiling.rs | 20 ++++++++++++++++++- .../common_dashboards/ethrex_l1_perf.json | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 8611ebbb6c5..f3d83bfe316 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -107,7 +107,25 @@ where } } -/// Record the duration of an async operation under the shared function profiling histogram. +/// Records the duration of an async operation in the function profiling histogram. +/// +/// This provides a lightweight alternative to the `#[instrument]` attribute when you need +/// manual control over timing instrumentation, such as in RPC handlers. +/// +/// # Parameters +/// * `namespace` - Category for the metric (e.g., "rpc", "engine", "block_execution") +/// * `function_name` - Name identifier for the operation being timed +/// * `future` - The async operation to time +/// +/// # Example +/// ``` +/// let result = record_async_duration("rpc", "eth_getBalance", async { +/// // your async operation +/// }).await; +/// ``` +/// +/// Use this function when you need to instrument an async operation for duration metrics, +/// but cannot or do not want to use the `#[instrument]` attribute (for example, in RPC handlers). pub async fn record_async_duration(namespace: &str, function_name: &str, future: Fut) -> T where Fut: Future, diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 52a9be94b3a..158983ab403 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1849,7 +1849,7 @@ "y": 28 }, "id": 112, - "interval": "10ss", + "interval": "10s", "options": { "legend": { "calcs": [ From cd1534d4332a9ae997a4dc33f155acd01a5a8857 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 15:00:26 -0300 Subject: [PATCH 23/28] missed suggestions Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../grafana/dashboards/common_dashboards/ethrex_l1_perf.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index 158983ab403..a33fc120808 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1733,7 +1733,7 @@ "y": 28 }, "id": 115, - "interval": "10ss", + "interval": "10s", "options": { "displayMode": "gradient", "legend": { From 2948d975a449ab14b6c12bc3a49e2102c42677a4 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 15:07:41 -0300 Subject: [PATCH 24/28] updated cargo lock for l2 quote-gen --- crates/l2/tee/quote-gen/Cargo.lock | 55 ++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index 94a2dc28d8d..c6e9d3379c3 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -2301,10 +2301,14 @@ dependencies = [ name = "ethrex-metrics" version = "6.0.0" dependencies = [ + "axum", "ethrex-common", + "prometheus 0.13.4", "serde 1.0.228", "serde_json", "thiserror 2.0.16", + "tokio", + "tracing", "tracing-subscriber", ] @@ -2332,7 +2336,7 @@ dependencies = [ "hmac", "indexmap 2.11.4", "lazy_static", - "prometheus", + "prometheus 0.14.0", "rand 0.8.5", "rayon", "secp256k1", @@ -2374,6 +2378,7 @@ dependencies = [ "ethrex-blockchain", "ethrex-common", "ethrex-crypto", + "ethrex-metrics", "ethrex-p2p", "ethrex-rlp", "ethrex-storage", @@ -4693,6 +4698,46 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "procfs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" +dependencies = [ + "bitflags 2.9.4", + "hex", + "lazy_static", + "procfs-core", + "rustix 0.38.44", +] + +[[package]] +name = "procfs-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" +dependencies = [ + "bitflags 2.9.4", + "hex", +] + +[[package]] +name = "prometheus" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +dependencies = [ + "cfg-if 1.0.3", + "fnv", + "lazy_static", + "libc", + "memchr", + "parking_lot", + "procfs", + "protobuf 2.28.0", + "thiserror 1.0.69", +] + [[package]] name = "prometheus" version = "0.14.0" @@ -4704,7 +4749,7 @@ dependencies = [ "lazy_static", "memchr", "parking_lot", - "protobuf", + "protobuf 3.7.2", "thiserror 2.0.16", ] @@ -4724,6 +4769,12 @@ dependencies = [ "unarray", ] +[[package]] +name = "protobuf" +version = "2.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" + [[package]] name = "protobuf" version = "3.7.2" From 39bd789e9f677f8c00dd956f13a55c3853aa9159 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 15:38:16 -0300 Subject: [PATCH 25/28] remove Cow given that it was already cloning --- crates/blockchain/metrics/profiling.rs | 27 ++++++++++---------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index f3d83bfe316..35af4a4b267 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -1,5 +1,5 @@ use prometheus::{Encoder, HistogramTimer, HistogramVec, TextEncoder, register_histogram_vec}; -use std::{borrow::Cow, future::Future, sync::LazyLock}; +use std::{future::Future, sync::LazyLock}; use tracing::{ Subscriber, field::{Field, Visit}, @@ -30,17 +30,17 @@ pub struct FunctionProfilingLayer; struct ProfileTimer(HistogramTimer); /// Span extension storing the profiling namespace selected by instrumentation. -struct Namespace(Cow<'static, str>); +struct Namespace(String); #[derive(Default)] struct NamespaceVisitor { - namespace: Option>, + namespace: Option, } impl Visit for NamespaceVisitor { fn record_str(&mut self, field: &Field, value: &str) { if field.name() == "namespace" { - self.namespace = Some(Cow::Owned(value.to_owned())); + self.namespace = Some(value.to_owned()); } } @@ -51,7 +51,7 @@ impl Visit for NamespaceVisitor { .strip_prefix('"') .and_then(|s| s.strip_suffix('"')) .unwrap_or(&rendered); - self.namespace = Some(Cow::Owned(cleaned.to_owned())); + self.namespace = Some(cleaned.to_owned()); } } } @@ -80,16 +80,16 @@ where return; } - let namespace = span - .extensions() + let extensions = span.extensions(); + let namespace = extensions .get::() - .map(|ns| ns.0.clone()) - .unwrap_or_else(|| Cow::Borrowed("default")); + .map(|ns| ns.0.as_str()) + .unwrap_or("default"); let function_name = span.metadata().name(); let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[namespace.as_ref(), function_name]) + .with_label_values(&[namespace, function_name]) .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); @@ -117,13 +117,6 @@ where /// * `function_name` - Name identifier for the operation being timed /// * `future` - The async operation to time /// -/// # Example -/// ``` -/// let result = record_async_duration("rpc", "eth_getBalance", async { -/// // your async operation -/// }).await; -/// ``` -/// /// Use this function when you need to instrument an async operation for duration metrics, /// but cannot or do not want to use the `#[instrument]` attribute (for example, in RPC handlers). pub async fn record_async_duration(namespace: &str, function_name: &str, future: Fut) -> T From 5b167ab4b08554a35601d7ad6fdb613ad885ca06 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 15:44:26 -0300 Subject: [PATCH 26/28] Add comment about why we use String instead of &'static str in the namespace --- crates/blockchain/metrics/profiling.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index 35af4a4b267..f0d02864aad 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -29,7 +29,9 @@ pub struct FunctionProfilingLayer; /// Wrapper around [`HistogramTimer`] to avoid conflicts with other layers struct ProfileTimer(HistogramTimer); -/// Span extension storing the profiling namespace selected by instrumentation. +/// Span extension storing the profiling namespace selected by instrumentation. This needs to +/// be a String instead of a &'static str because using the span macros we could recieve dynamically +/// generated names and can't rely on only string literals. struct Namespace(String); #[derive(Default)] From e890efa7680240326a8f9df7b0abecb4142f1fc7 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Thu, 13 Nov 2025 23:26:41 -0300 Subject: [PATCH 27/28] limit the life of exetension to the let timer block --- crates/blockchain/metrics/profiling.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/crates/blockchain/metrics/profiling.rs b/crates/blockchain/metrics/profiling.rs index f0d02864aad..dc746191f37 100644 --- a/crates/blockchain/metrics/profiling.rs +++ b/crates/blockchain/metrics/profiling.rs @@ -81,18 +81,20 @@ where if target.contains("::rpc") { return; } + let timer = { + let extensions = span.extensions(); + let namespace = extensions + .get::() + .map(|ns| ns.0.as_str()) + .unwrap_or("default"); - let extensions = span.extensions(); - let namespace = extensions - .get::() - .map(|ns| ns.0.as_str()) - .unwrap_or("default"); + let function_name = span.metadata().name(); - let function_name = span.metadata().name(); + METRICS_BLOCK_PROCESSING_PROFILE + .with_label_values(&[namespace, function_name]) + .start_timer() + }; - let timer = METRICS_BLOCK_PROCESSING_PROFILE - .with_label_values(&[namespace, function_name]) - .start_timer(); // PERF: `extensions_mut` uses a Mutex internally (per span) span.extensions_mut().insert(ProfileTimer(timer)); } From bd95d3113f53c341cc17abedad2c2e98ba8cdae6 Mon Sep 17 00:00:00 2001 From: Rodrigo Oliveri Date: Fri, 14 Nov 2025 09:48:59 -0300 Subject: [PATCH 28/28] modify 5m ranges to the actual range selected in some panels --- .../grafana/dashboards/common_dashboards/ethrex_l1_perf.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json index a33fc120808..b450de32bca 100644 --- a/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json +++ b/metrics/provisioning/grafana/dashboards/common_dashboards/ethrex_l1_perf.json @@ -1687,7 +1687,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (function_name) (\n rate(function_duration_seconds_count{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n namespace=\"engine\"\n }[5m])\n)", + "expr": "sum by (function_name) (\n rate(function_duration_seconds_count{\n job=\"$job\",\n instance=~\"$instance(:\\\\d+)?$\",\n namespace=\"engine\"\n }[$__range])\n)", "instant": false, "legendFormat": "{{function_name}}", "range": true, @@ -2059,7 +2059,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[5m])", + "expr": "rate(function_duration_seconds_sum{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range]) / rate(function_duration_seconds_count{job=\"$job\", instance=~\"$instance(:\\\\d+)?$\", namespace=\"rpc\"}[$__range])", "format": "table", "instant": true, "legendFormat": "__auto",