diff --git a/cmd/ethrex/cli.rs b/cmd/ethrex/cli.rs index 656bbd2198f..4f153870b20 100644 --- a/cmd/ethrex/cli.rs +++ b/cmd/ethrex/cli.rs @@ -21,7 +21,9 @@ use tokio_util::sync::CancellationToken; use tracing::{Level, info, warn}; use crate::{ - initializers::{get_network, init_blockchain, init_store, init_tracing, load_store}, + initializers::{ + get_network, init_blockchain, init_store, init_tracing, load_store, regenerate_head_state, + }, utils::{self, default_datadir, get_client_version, get_minimal_client_version, init_datadir}, }; @@ -349,6 +351,22 @@ pub enum Subcommand { #[arg(long, action = ArgAction::SetTrue)] l2: bool, }, + #[command( + name = "import-bench", + about = "Import blocks to the database for benchmarking" + )] + ImportBench { + #[arg( + required = true, + value_name = "FILE_PATH/FOLDER", + help = "Path to a RLP chain file or a folder containing files with individual Blocks" + )] + path: String, + #[arg(long = "removedb", action = ArgAction::SetTrue)] + removedb: bool, + #[arg(long, action = ArgAction::SetTrue)] + l2: bool, + }, #[command( name = "export", about = "Export blocks in the current chain into a file in rlp encoding" @@ -429,6 +447,31 @@ impl Subcommand { ) .await?; } + Subcommand::ImportBench { path, removedb, l2 } => { + if removedb { + remove_db(&opts.datadir.clone(), opts.force); + } + info!("ethrex version: {}", get_client_version()); + + let network = get_network(opts); + let genesis = network.get_genesis()?; + let blockchain_type = if l2 { + BlockchainType::L2(L2Config::default()) + } else { + BlockchainType::L1 + }; + import_blocks_bench( + &path, + &opts.datadir, + genesis, + BlockchainOptions { + r#type: blockchain_type, + perf_logs_enabled: true, + ..Default::default() + }, + ) + .await?; + } Subcommand::Export { path, first, last } => { export_blocks(&path, &opts.datadir, first, last).await } @@ -624,6 +667,118 @@ pub async fn import_blocks( Ok(()) } +pub async fn import_blocks_bench( + path: &str, + datadir: &Path, + genesis: Genesis, + blockchain_opts: BlockchainOptions, +) -> Result<(), ChainError> { + let start_time = Instant::now(); + init_datadir(datadir); + let store = init_store(datadir, genesis).await; + let blockchain = init_blockchain(store.clone(), blockchain_opts); + regenerate_head_state(&store, &blockchain).await.unwrap(); + let path_metadata = metadata(path).expect("Failed to read path"); + + // If it's an .rlp file it will be just one chain, but if it's a directory there can be multiple chains. + let chains: Vec> = if path_metadata.is_dir() { + info!(path = %path, "Importing blocks from directory"); + let mut entries: Vec<_> = read_dir(path) + .expect("Failed to read blocks directory") + .map(|res| res.expect("Failed to open file in directory").path()) + .collect(); + + // Sort entries to process files in order (e.g., 1.rlp, 2.rlp, ...) + entries.sort(); + + entries + .iter() + .map(|entry| { + let path_str = entry.to_str().expect("Couldn't convert path to string"); + info!(path = %path_str, "Importing blocks from file"); + utils::read_chain_file(path_str) + }) + .collect() + } else { + info!(path = %path, "Importing blocks from file"); + vec![utils::read_chain_file(path)] + }; + + let mut total_blocks_imported = 0; + for blocks in chains { + let size = blocks.len(); + let mut numbers_and_hashes = blocks + .iter() + .map(|b| (b.header.number, b.hash())) + .collect::>(); + // Execute block by block + let mut last_progress_log = Instant::now(); + for (index, block) in blocks.into_iter().enumerate() { + let hash = block.hash(); + let number = block.header.number; + + // Log progress every 10 seconds + if last_progress_log.elapsed() >= Duration::from_secs(10) { + let processed = index + 1; + let percent = (((processed as f64 / size as f64) * 100.0) * 10.0).round() / 10.0; + info!(processed, total = size, percent, "Import progress"); + last_progress_log = Instant::now(); + } + + // Check if the block is already in the blockchain, if it is do nothing, if not add it + let block_number = store.get_block_number(hash).await.map_err(|_e| { + ChainError::Custom(String::from( + "Couldn't check if block is already in the blockchain", + )) + })?; + + if block_number.is_some() { + info!("Block {} is already in the blockchain", block.hash()); + continue; + } + + blockchain + .add_block_pipeline(block) + .inspect_err(|err| match err { + // Block number 1's parent not found, the chain must not belong to the same network as the genesis file + ChainError::ParentNotFound if number == 1 => warn!("The chain file is not compatible with the genesis file. Are you sure you selected the correct network?"), + _ => warn!("Failed to add block {number} with hash {hash:#x}"), + })?; + + // TODO: replace this + // This sleep is because we have a background process writing to disk the last layer + // And until it's done we can't execute the new block + // Because this wants to compare against running a real node in terms of reported performance + // It takes less than 500ms, so this is good enough, but we should report the performance + // without taking into account that wait. + tokio::time::sleep(Duration::from_millis(500)).await; + } + + // Make head canonical and label all special blocks correctly. + if let Some((head_number, head_hash)) = numbers_and_hashes.pop() { + store + .forkchoice_update( + Some(numbers_and_hashes), + head_number, + head_hash, + Some(head_number), + Some(head_number), + ) + .await?; + } + + total_blocks_imported += size; + } + + let total_duration = start_time.elapsed(); + info!( + blocks = total_blocks_imported, + seconds = total_duration.as_secs_f64(), + "Import completed" + ); + Ok(()) +} + pub async fn export_blocks( path: &str, datadir: &Path, diff --git a/docs/CLI.md b/docs/CLI.md index 1b8ae492669..4f70b57c1ab 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -12,6 +12,7 @@ Usage: ethrex [OPTIONS] [COMMAND] Commands: removedb Remove the database import Import blocks to the database + import-bench Import blocks to the database for benchmarking export Export blocks in the current chain into a file in rlp encoding compute-state-root Compute the state root from a genesis file help Print this message or the help of the given subcommand(s) @@ -147,7 +148,7 @@ Block building options: --builder.extra-data Block extra data message. - [default: "ethrex 5.0.0"] + [default: "ethrex 6.0.0"] --builder.gas-limit Target block gas limit. diff --git a/tooling/import_benchmark/Makefile b/tooling/import_benchmark/Makefile new file mode 100644 index 00000000000..e0c8915ac6c --- /dev/null +++ b/tooling/import_benchmark/Makefile @@ -0,0 +1,15 @@ +BENCH_ID ?= 1 +NETWORK ?= hoodi + +help: + @printf "run-bench: ## Runs a bench for the current pr. \nParameters:\n -BENCH_ID: number for the log file where it will\ + be saved with the format bench-BENCH_ID.log\n -NETWORK: which network to acesss (hoodi, mainnet)\nRequirements:\n This tool assumes we are running on Linux,\ + and that we have a valid db in ~/.local/share/ethrex_NETWORK_bench/ethrex\n with a valid state and a list of blocks for import\ + in ~/.local/share/ethrex_NETWORK_bench/chain.rlp\n\n" + @printf "python3 parse_bench.py bench_num_1 bench_num_2: ## Parses the bench log files from [bench_num_1 to bench_num_2) to find average ggas\nRequirements\n\ + This script assumes we have the bench logs on the ethrex folder\n\n" + +run-bench: ## Runs a bench for the current pr. parameters -BENCH_ID: number for the log file where it will be saved -NETWORK: which network to acesss + rm -rf ~/.local/share/temp + cp -r ~/.local/share/ethrex_$(NETWORK)_bench/ethrex ~/.local/share/temp + cd ../.. && cargo r --release -- --network $(NETWORK) --datadir ~/.local/share/temp import-bench ~/.local/share/ethrex_$(NETWORK)_bench/chain.rlp | tee bench-$(BENCH_ID).log diff --git a/tooling/import_benchmark/README.md b/tooling/import_benchmark/README.md new file mode 100644 index 00000000000..a5b8e7aef12 --- /dev/null +++ b/tooling/import_benchmark/README.md @@ -0,0 +1,56 @@ +# Import Benchmark + +## Why + +This tool is used to benchmark the performance of **ethrex**. +We aim to execute the same set of blocks on the same hardware to ensure consistent +performance comparisons. Doing this on a running node is difficult because of variations +in hardware, peer count, block content, and system load. + +To achieve consistent results, we run the same blocks multiple times on the same machine +using the `import-bench` subcommand. + +## Setup + +To run this benchmark, you will need: + +- An **ethrex** database containing the blockchain state (required for realistic + database performance testing), located at: + `~/.local/share/ethrex_NETWORK_bench/ethrex` +- The database **must have completed snapshot generation** (`flatkeyvalue` generation). + *(On mainnet, this process takes about 8 hours.)* +- A `chain.rlp` file containing the blocks you want to test, located at: + `~/.local/share/ethrex_NETWORK_bench/chain.rlp` +- It is recommended that the file contains **at least 1,000 blocks**, +which can be generated using the `export` subcommand in ethrex. + +### Recommended procedure + +1. Run an ethrex node until it fully syncs and generates the snapshots. +2. Shut down the node and copy the database and the last block number. +3. Restart the node and let it advance by *X* additional blocks. +4. Stop the node again and run: + ```bash + ethrex export --first --last ~/.local/share/ethrex_NETWORK_bench/chain.rlp + ``` + +## Run + +The Makefile includes the following command: + +``` +run-bench: ## Runs a benchmark for the current PR. +``` + +Parameters: + - BENCH_ID: Identifier for the log file, saved as bench-BENCH_ID.log + - NETWORK: Network to access (e.g., hoodi, mainnet) + + +Example: +`make run-bench BENCH_ID=1 NETWORK=mainnet` + +## View Output + +You can view and compare benchmark results with: +`python3 parse_bench.py ` diff --git a/tooling/import_benchmark/parse_bench.py b/tooling/import_benchmark/parse_bench.py new file mode 100644 index 00000000000..b5dde25695d --- /dev/null +++ b/tooling/import_benchmark/parse_bench.py @@ -0,0 +1,43 @@ +import numpy as np +import sys + +bench = {} +bench_around = {} + +start, end = sys.argv[-2:] +for i in range(int(start), int(end)): + bench_around[i] = {} + + with open(f"../../bench-{i}.log", "r+") as file: + for line in file: + if "Finished regenerating state" in line: + break + + for line in file: + if "[METRIC]" in line: + block_num = line.split(")")[0][-7:] + ggas = line.split(")")[1][2:7] + + if block_num not in bench: + bench[block_num] = {} + bench[block_num][i] = float(ggas) + bench_around[i][block_num] = float(ggas) + +total = 0 +count = 0 +for block in bench.values(): + for ggas in block.values(): + total += ggas + count += 1 + + +print("Blocks tested", len(bench)) +print("Mean ggas accross multiple runs:", total/count) +for run_count, run in bench_around.items(): + print("Mean ggas in run:",run_count,sum(run.values())/ len(run.values())) + +average_difference = [] +for block_num, block in bench.items(): + average_difference.append(max(block.values()) - min(block.values())) + pass +print("Mean ggas spread across blocks:", sum(average_difference) / count)