lambdaclass · fedacking · Nov 11, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
@@ -21,7 +21,9 @@ use tokio_util::sync::CancellationToken;
 use tracing::{Level, info, warn};
 
 use crate::{
-    initializers::{get_network, init_blockchain, init_store, init_tracing, load_store},
+    initializers::{
+        get_network, init_blockchain, init_store, init_tracing, load_store, regenerate_head_state,
+    },
     utils::{self, default_datadir, get_client_version, get_minimal_client_version, init_datadir},
 };
 
@@ -349,6 +351,22 @@ pub enum Subcommand {
         #[arg(long, action = ArgAction::SetTrue)]
         l2: bool,
     },
+    #[command(
+        name = "import-bench",
+        about = "Import blocks to the database for benchmarking"
+    )]
+    ImportBench {
+        #[arg(
+            required = true,
+            value_name = "FILE_PATH/FOLDER",
+            help = "Path to a RLP chain file or a folder containing files with individual Blocks"
+        )]
+        path: String,
+        #[arg(long = "removedb", action = ArgAction::SetTrue)]
+        removedb: bool,
+        #[arg(long, action = ArgAction::SetTrue)]
+        l2: bool,
+    },
     #[command(
         name = "export",
         about = "Export blocks in the current chain into a file in rlp encoding"
@@ -429,6 +447,31 @@ impl Subcommand {
                 )
                 .await?;
             }
+            Subcommand::ImportBench { path, removedb, l2 } => {
+                if removedb {
+                    remove_db(&opts.datadir.clone(), opts.force);
+                }
+                info!("ethrex version: {}", get_client_version());
+
+                let network = get_network(opts);
+                let genesis = network.get_genesis()?;
+                let blockchain_type = if l2 {
+                    BlockchainType::L2(L2Config::default())
+                } else {
+                    BlockchainType::L1
+                };
+                import_blocks_bench(
+                    &path,
+                    &opts.datadir,
+                    genesis,
+                    BlockchainOptions {
+                        r#type: blockchain_type,
+                        perf_logs_enabled: true,
+                        ..Default::default()
+                    },
+                )
+                .await?;
+            }
             Subcommand::Export { path, first, last } => {
                 export_blocks(&path, &opts.datadir, first, last).await
             }
@@ -624,6 +667,118 @@ pub async fn import_blocks(
     Ok(())
 }
 
+pub async fn import_blocks_bench(
+    path: &str,
+    datadir: &Path,
+    genesis: Genesis,
+    blockchain_opts: BlockchainOptions,
+) -> Result<(), ChainError> {
+    let start_time = Instant::now();
+    init_datadir(datadir);
+    let store = init_store(datadir, genesis).await;
+    let blockchain = init_blockchain(store.clone(), blockchain_opts);
+    regenerate_head_state(&store, &blockchain).await.unwrap();
+    let path_metadata = metadata(path).expect("Failed to read path");
+
+    // If it's an .rlp file it will be just one chain, but if it's a directory there can be multiple chains.
+    let chains: Vec<Vec<Block>> = if path_metadata.is_dir() {
+        info!(path = %path, "Importing blocks from directory");
+        let mut entries: Vec<_> = read_dir(path)
+            .expect("Failed to read blocks directory")
+            .map(|res| res.expect("Failed to open file in directory").path())
+            .collect();
+
+        // Sort entries to process files in order (e.g., 1.rlp, 2.rlp, ...)
+        entries.sort();
+
+        entries
+            .iter()
+            .map(|entry| {
+                let path_str = entry.to_str().expect("Couldn't convert path to string");
+                info!(path = %path_str, "Importing blocks from file");
+                utils::read_chain_file(path_str)
+            })
+            .collect()
+    } else {
+        info!(path = %path, "Importing blocks from file");
+        vec![utils::read_chain_file(path)]
+    };
+
+    let mut total_blocks_imported = 0;
+    for blocks in chains {
+        let size = blocks.len();
+        let mut numbers_and_hashes = blocks
+            .iter()
+            .map(|b| (b.header.number, b.hash()))
+            .collect::<Vec<_>>();
+        // Execute block by block
+        let mut last_progress_log = Instant::now();
+        for (index, block) in blocks.into_iter().enumerate() {
+            let hash = block.hash();
+            let number = block.header.number;
+
+            // Log progress every 10 seconds
+            if last_progress_log.elapsed() >= Duration::from_secs(10) {
+                let processed = index + 1;
+                let percent = (((processed as f64 / size as f64) * 100.0) * 10.0).round() / 10.0;
+                info!(processed, total = size, percent, "Import progress");
+                last_progress_log = Instant::now();
+            }
+
+            // Check if the block is already in the blockchain, if it is do nothing, if not add it
+            let block_number = store.get_block_number(hash).await.map_err(|_e| {
+                ChainError::Custom(String::from(
+                    "Couldn't check if block is already in the blockchain",
+                ))
+            })?;
+
+            if block_number.is_some() {
+                info!("Block {} is already in the blockchain", block.hash());
+                continue;
+            }
+
+            blockchain
+                .add_block_pipeline(block)
+                .inspect_err(|err| match err {
+                    // Block number 1's parent not found, the chain must not belong to the same network as the genesis file
+                    ChainError::ParentNotFound if number == 1 => warn!("The chain file is not compatible with the genesis file. Are you sure you selected the correct network?"),
+                    _ => warn!("Failed to add block {number} with hash {hash:#x}"),
+                })?;
+
+            // TODO: replace this
+            // This sleep is because we have a background process writing to disk the last layer
+            // And until it's done we can't execute the new block
+            // Because this wants to compare against running a real node in terms of reported performance
+            // It takes less than 500ms, so this is good enough, but we should report the performance
+            // without taking into account that wait.
+            tokio::time::sleep(Duration::from_millis(500)).await;
+        }
+
+        // Make head canonical and label all special blocks correctly.
+        if let Some((head_number, head_hash)) = numbers_and_hashes.pop() {
+            store
+                .forkchoice_update(
+                    Some(numbers_and_hashes),
+                    head_number,
+                    head_hash,
+                    Some(head_number),
+                    Some(head_number),
+                )
+                .await?;
+        }
+
+        total_blocks_imported += size;
+    }
+
+    let total_duration = start_time.elapsed();
+    info!(
+        blocks = total_blocks_imported,
+        seconds = total_duration.as_secs_f64(),
+        "Import completed"
+    );
+    Ok(())
+}
+
 pub async fn export_blocks(
     path: &str,
     datadir: &Path,

@@ -12,6 +12,7 @@ Usage: ethrex [OPTIONS] [COMMAND]
 Commands:
   removedb            Remove the database
   import              Import blocks to the database
+  import-bench        Import blocks to the database for benchmarking
   export              Export blocks in the current chain into a file in rlp encoding
   compute-state-root  Compute the state root from a genesis file
   help                Print this message or the help of the given subcommand(s)
@@ -147,7 +148,7 @@ Block building options:
       --builder.extra-data <EXTRA_DATA>
           Block extra data message.
 
-          [default: "ethrex 5.0.0"]
+          [default: "ethrex 6.0.0"]
 
       --builder.gas-limit <GAS_LIMIT>
           Target block gas limit.

@@ -0,0 +1,15 @@
+BENCH_ID ?= 1
+NETWORK ?= hoodi
+
+help:
+	@printf "run-bench: ## Runs a bench for the current pr. \nParameters:\n -BENCH_ID: number for the log file where it will\
+	 be saved with the format bench-BENCH_ID.log\n -NETWORK: which network to acesss (hoodi, mainnet)\nRequirements:\n This tool assumes we are running on Linux,\
+	 and that we have a valid db in ~/.local/share/ethrex_NETWORK_bench/ethrex\n with a valid state and a list of blocks for import\
+	 in ~/.local/share/ethrex_NETWORK_bench/chain.rlp\n\n"
+	@printf "python3 parse_bench.py bench_num_1 bench_num_2: ## Parses the bench log files from [bench_num_1 to bench_num_2) to find average ggas\nRequirements\n\
+	 This script assumes we have the bench logs on the ethrex folder\n\n"
+
+run-bench: ## Runs a bench for the current pr. parameters -BENCH_ID: number for the log file where it will be saved -NETWORK: which network to acesss
+	rm -rf ~/.local/share/temp
+	cp -r ~/.local/share/ethrex_$(NETWORK)_bench/ethrex ~/.local/share/temp
+	cd ../.. && cargo r --release -- --network $(NETWORK) --datadir ~/.local/share/temp import-bench ~/.local/share/ethrex_$(NETWORK)_bench/chain.rlp | tee bench-$(BENCH_ID).log
@@ -0,0 +1,56 @@
+# Import Benchmark
+
+## Why
+
+This tool is used to benchmark the performance of **ethrex**.  
+We aim to execute the same set of blocks on the same hardware to ensure consistent 
+performance comparisons. Doing this on a running node is difficult because of variations 
+in hardware, peer count, block content, and system load.
+
+To achieve consistent results, we run the same blocks multiple times on the same machine 
+using the `import-bench` subcommand.
+
+## Setup
+
+To run this benchmark, you will need:
+
+- An **ethrex** database containing the blockchain state (required for realistic
+ database performance testing), located at:  
+  `~/.local/share/ethrex_NETWORK_bench/ethrex`
+- The database **must have completed snapshot generation** (`flatkeyvalue` generation).  
+  *(On mainnet, this process takes about 8 hours.)*
+- A `chain.rlp` file containing the blocks you want to test, located at:  
+  `~/.local/share/ethrex_NETWORK_bench/chain.rlp`
+- It is recommended that the file contains **at least 1,000 blocks**, 
+which can be generated using the `export` subcommand in ethrex.
+
+### Recommended procedure
+
+1. Run an ethrex node until it fully syncs and generates the snapshots.  
+2. Shut down the node and copy the database and the last block number.  
+3. Restart the node and let it advance by *X* additional blocks.  
+4. Stop the node again and run:  
+   ```bash
+   ethrex export --first <block_num> --last <block_num + X> ~/.local/share/ethrex_NETWORK_bench/chain.rlp
+   ```
+
+## Run
+
+The Makefile includes the following command:
+
+```
+run-bench: ## Runs a benchmark for the current PR.
+```
+
+Parameters:
+  - BENCH_ID: Identifier for the log file, saved as bench-BENCH_ID.log
+  - NETWORK: Network to access (e.g., hoodi, mainnet)
+
+
+Example: 
+`make run-bench BENCH_ID=1 NETWORK=mainnet`
+
+## View Output
+
+You can view and compare benchmark results with:
+`python3 parse_bench.py <bench_num_1> <bench_num_2>`
-`python3 parse_bench.py <bench_num_1> <bench_num_2>`
+`python3 parse_bench.py <bench_id_start> <bench_id_end>`
-`python3 parse_bench.py <bench_num_1> <bench_num_2>`
+`python3 parse_bench.py <bench_id_start> <bench_id_end>`
@@ -0,0 +1,43 @@
+import numpy as np
+import sys
+
+bench = {}
+bench_around = {}
+
+start, end = sys.argv[-2:]
+for i in range(int(start), int(end)):
+    bench_around[i] = {}
+
+    with open(f"../../bench-{i}.log", "r+") as file:
+        for line in file:
+            if "Finished regenerating state" in line:
+                break
+
+        for line in file:
+            if "[METRIC]" in line:
+                block_num = line.split(")")[0][-7:]
+                ggas = line.split(")")[1][2:7]
+
+                if block_num not in bench:
+                    bench[block_num] = {}
+                bench[block_num][i] = float(ggas)
+                bench_around[i][block_num] = float(ggas)
+
+total = 0
+count = 0
+for block in bench.values():
+    for ggas in block.values():
+        total += ggas
+        count += 1
+
+
+print("Blocks tested", len(bench))
+print("Mean ggas accross multiple runs:", total/count)
+for run_count, run in bench_around.items():
+    print("Mean ggas in run:",run_count,sum(run.values())/ len(run.values()))
+
+average_difference = []
+for block_num, block in bench.items():
+    average_difference.append(max(block.values()) - min(block.values()))
+    pass
+print("Mean ggas spread across blocks:", sum(average_difference) / count)