Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 156 additions & 1 deletion cmd/ethrex/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ use tokio_util::sync::CancellationToken;
use tracing::{Level, info, warn};

use crate::{
initializers::{get_network, init_blockchain, init_store, init_tracing, load_store},
initializers::{
get_network, init_blockchain, init_store, init_tracing, load_store, regenerate_head_state,
},
utils::{self, default_datadir, get_client_version, get_minimal_client_version, init_datadir},
};

Expand Down Expand Up @@ -349,6 +351,22 @@ pub enum Subcommand {
#[arg(long, action = ArgAction::SetTrue)]
l2: bool,
},
#[command(
name = "import-bench",
about = "Import blocks to the database for benchmarking"
)]
ImportBench {
#[arg(
required = true,
value_name = "FILE_PATH/FOLDER",
help = "Path to a RLP chain file or a folder containing files with individual Blocks"
)]
path: String,
#[arg(long = "removedb", action = ArgAction::SetTrue)]
removedb: bool,
#[arg(long, action = ArgAction::SetTrue)]
l2: bool,
},
#[command(
name = "export",
about = "Export blocks in the current chain into a file in rlp encoding"
Expand Down Expand Up @@ -429,6 +447,31 @@ impl Subcommand {
)
.await?;
}
Subcommand::ImportBench { path, removedb, l2 } => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should try to merge this with the import subcommand in the future.

if removedb {
remove_db(&opts.datadir.clone(), opts.force);
}
info!("ethrex version: {}", get_client_version());

let network = get_network(opts);
let genesis = network.get_genesis()?;
let blockchain_type = if l2 {
BlockchainType::L2(L2Config::default())
} else {
BlockchainType::L1
};
import_blocks_bench(
&path,
&opts.datadir,
genesis,
BlockchainOptions {
r#type: blockchain_type,
perf_logs_enabled: true,
..Default::default()
},
)
.await?;
}
Subcommand::Export { path, first, last } => {
export_blocks(&path, &opts.datadir, first, last).await
}
Expand Down Expand Up @@ -624,6 +667,118 @@ pub async fn import_blocks(
Ok(())
}

pub async fn import_blocks_bench(
path: &str,
datadir: &Path,
genesis: Genesis,
blockchain_opts: BlockchainOptions,
) -> Result<(), ChainError> {
let start_time = Instant::now();
init_datadir(datadir);
let store = init_store(datadir, genesis).await;
let blockchain = init_blockchain(store.clone(), blockchain_opts);
regenerate_head_state(&store, &blockchain).await.unwrap();
let path_metadata = metadata(path).expect("Failed to read path");

// If it's an .rlp file it will be just one chain, but if it's a directory there can be multiple chains.
let chains: Vec<Vec<Block>> = if path_metadata.is_dir() {
info!(path = %path, "Importing blocks from directory");
let mut entries: Vec<_> = read_dir(path)
.expect("Failed to read blocks directory")
.map(|res| res.expect("Failed to open file in directory").path())
.collect();

// Sort entries to process files in order (e.g., 1.rlp, 2.rlp, ...)
entries.sort();

entries
.iter()
.map(|entry| {
let path_str = entry.to_str().expect("Couldn't convert path to string");
info!(path = %path_str, "Importing blocks from file");
utils::read_chain_file(path_str)
})
.collect()
} else {
info!(path = %path, "Importing blocks from file");
vec![utils::read_chain_file(path)]
};

let mut total_blocks_imported = 0;
for blocks in chains {
let size = blocks.len();
let mut numbers_and_hashes = blocks
.iter()
.map(|b| (b.header.number, b.hash()))
.collect::<Vec<_>>();
// Execute block by block
let mut last_progress_log = Instant::now();
for (index, block) in blocks.into_iter().enumerate() {
let hash = block.hash();
let number = block.header.number;

// Log progress every 10 seconds
if last_progress_log.elapsed() >= Duration::from_secs(10) {
let processed = index + 1;
let percent = (((processed as f64 / size as f64) * 100.0) * 10.0).round() / 10.0;
info!(processed, total = size, percent, "Import progress");
last_progress_log = Instant::now();
}

// Check if the block is already in the blockchain, if it is do nothing, if not add it
let block_number = store.get_block_number(hash).await.map_err(|_e| {
ChainError::Custom(String::from(
"Couldn't check if block is already in the blockchain",
))
})?;

if block_number.is_some() {
info!("Block {} is already in the blockchain", block.hash());
continue;
}

blockchain
.add_block_pipeline(block)
.inspect_err(|err| match err {
// Block number 1's parent not found, the chain must not belong to the same network as the genesis file
ChainError::ParentNotFound if number == 1 => warn!("The chain file is not compatible with the genesis file. Are you sure you selected the correct network?"),
_ => warn!("Failed to add block {number} with hash {hash:#x}"),
})?;

// TODO: replace this
// This sleep is because we have a background process writing to disk the last layer
// And until it's done we can't execute the new block
// Because this wants to compare against running a real node in terms of reported performance
// It takes less than 500ms, so this is good enough, but we should report the performance
// without taking into account that wait.
tokio::time::sleep(Duration::from_millis(500)).await;
}

// Make head canonical and label all special blocks correctly.
if let Some((head_number, head_hash)) = numbers_and_hashes.pop() {
store
.forkchoice_update(
Some(numbers_and_hashes),
head_number,
head_hash,
Some(head_number),
Some(head_number),
)
.await?;
}

total_blocks_imported += size;
}

let total_duration = start_time.elapsed();
info!(
blocks = total_blocks_imported,
seconds = total_duration.as_secs_f64(),
"Import completed"
);
Ok(())
}

pub async fn export_blocks(
path: &str,
datadir: &Path,
Expand Down
3 changes: 2 additions & 1 deletion docs/CLI.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Usage: ethrex [OPTIONS] [COMMAND]
Commands:
removedb Remove the database
import Import blocks to the database
import-bench Import blocks to the database for benchmarking
export Export blocks in the current chain into a file in rlp encoding
compute-state-root Compute the state root from a genesis file
help Print this message or the help of the given subcommand(s)
Expand Down Expand Up @@ -147,7 +148,7 @@ Block building options:
--builder.extra-data <EXTRA_DATA>
Block extra data message.

[default: "ethrex 5.0.0"]
[default: "ethrex 6.0.0"]

--builder.gas-limit <GAS_LIMIT>
Target block gas limit.
Expand Down
15 changes: 15 additions & 0 deletions tooling/import_benchmark/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
BENCH_ID ?= 1
NETWORK ?= hoodi

help:
@printf "run-bench: ## Runs a bench for the current pr. \nParameters:\n -BENCH_ID: number for the log file where it will\
be saved with the format bench-BENCH_ID.log\n -NETWORK: which network to acesss (hoodi, mainnet)\nRequirements:\n This tool assumes we are running on Linux,\
and that we have a valid db in ~/.local/share/ethrex_NETWORK_bench/ethrex\n with a valid state and a list of blocks for import\
in ~/.local/share/ethrex_NETWORK_bench/chain.rlp\n\n"
@printf "python3 parse_bench.py bench_num_1 bench_num_2: ## Parses the bench log files from [bench_num_1 to bench_num_2) to find average ggas\nRequirements\n\
This script assumes we have the bench logs on the ethrex folder\n\n"

run-bench: ## Runs a bench for the current pr. parameters -BENCH_ID: number for the log file where it will be saved -NETWORK: which network to acesss
rm -rf ~/.local/share/temp
cp -r ~/.local/share/ethrex_$(NETWORK)_bench/ethrex ~/.local/share/temp
cd ../.. && cargo r --release -- --network $(NETWORK) --datadir ~/.local/share/temp import-bench ~/.local/share/ethrex_$(NETWORK)_bench/chain.rlp | tee bench-$(BENCH_ID).log
56 changes: 56 additions & 0 deletions tooling/import_benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Import Benchmark

## Why

This tool is used to benchmark the performance of **ethrex**.
We aim to execute the same set of blocks on the same hardware to ensure consistent
performance comparisons. Doing this on a running node is difficult because of variations
in hardware, peer count, block content, and system load.

To achieve consistent results, we run the same blocks multiple times on the same machine
using the `import-bench` subcommand.

## Setup

To run this benchmark, you will need:

- An **ethrex** database containing the blockchain state (required for realistic
database performance testing), located at:
`~/.local/share/ethrex_NETWORK_bench/ethrex`
- The database **must have completed snapshot generation** (`flatkeyvalue` generation).
*(On mainnet, this process takes about 8 hours.)*
- A `chain.rlp` file containing the blocks you want to test, located at:
`~/.local/share/ethrex_NETWORK_bench/chain.rlp`
- It is recommended that the file contains **at least 1,000 blocks**,
which can be generated using the `export` subcommand in ethrex.

### Recommended procedure

1. Run an ethrex node until it fully syncs and generates the snapshots.
2. Shut down the node and copy the database and the last block number.
3. Restart the node and let it advance by *X* additional blocks.
4. Stop the node again and run:
```bash
ethrex export --first <block_num> --last <block_num + X> ~/.local/share/ethrex_NETWORK_bench/chain.rlp
Copy link
Collaborator

@MegaRedHand MegaRedHand Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated to this PR, but I got an error "Too many open files" when exporting the chain. We probably need to add the "ulimit fix" to the export command.

Edit: nvm, I also got the error when running make run-bench. We should add that fix when opening the DB. This can be changed in another PR.

```

## Run

The Makefile includes the following command:

```
run-bench: ## Runs a benchmark for the current PR.
```

Parameters:
- BENCH_ID: Identifier for the log file, saved as bench-BENCH_ID.log
- NETWORK: Network to access (e.g., hoodi, mainnet)


Example:
`make run-bench BENCH_ID=1 NETWORK=mainnet`

## View Output

You can view and compare benchmark results with:
`python3 parse_bench.py <bench_num_1> <bench_num_2>`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Running this, I got a ModuleNotFoundError.

This comment was marked as resolved.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
`python3 parse_bench.py <bench_num_1> <bench_num_2>`
`python3 parse_bench.py <bench_id_start> <bench_id_end>`

We should also add an example, like we have in the PR description:

For example:

```text
# here we compare bench runs from 10 to 13 (non-inclusive)
$ python3 parse_bench.py 10 13
Blocks tested 1065
Mean ggas accross multiple runs: 0.14995899843505422
Mean ggas in run: 10 0.15196807511737107
Mean ggas in run: 11 0.15213427230046964
Mean ggas in run: 12 0.14577464788732383
Mean ggas spread across blocks: 0.004736776212832552
```

43 changes: 43 additions & 0 deletions tooling/import_benchmark/parse_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
import sys

bench = {}
bench_around = {}

start, end = sys.argv[-2:]
for i in range(int(start), int(end)):
bench_around[i] = {}

with open(f"../../bench-{i}.log", "r+") as file:
for line in file:
if "Finished regenerating state" in line:
break

for line in file:
if "[METRIC]" in line:
block_num = line.split(")")[0][-7:]
ggas = line.split(")")[1][2:7]

if block_num not in bench:
bench[block_num] = {}
bench[block_num][i] = float(ggas)
bench_around[i][block_num] = float(ggas)

total = 0
count = 0
for block in bench.values():
for ggas in block.values():
total += ggas
count += 1


print("Blocks tested", len(bench))
print("Mean ggas accross multiple runs:", total/count)
for run_count, run in bench_around.items():
print("Mean ggas in run:",run_count,sum(run.values())/ len(run.values()))

average_difference = []
for block_num, block in bench.items():
average_difference.append(max(block.values()) - min(block.values()))
pass
print("Mean ggas spread across blocks:", sum(average_difference) / count)