From a503cdb3b16d6ee9be4217ffc001002ec4ed91a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 25 Sep 2025 16:24:45 -0300 Subject: [PATCH 01/50] prealloc --- crates/common/rlp/structs.rs | 10 ++++++++++ crates/common/trie/node/branch.rs | 8 ++++++-- crates/common/trie/node/extension.rs | 7 +++++-- crates/common/trie/node/leaf.rs | 8 ++++++-- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/crates/common/rlp/structs.rs b/crates/common/rlp/structs.rs index d493389c8f1..68a31ccd30b 100644 --- a/crates/common/rlp/structs.rs +++ b/crates/common/rlp/structs.rs @@ -183,6 +183,16 @@ impl<'a> Encoder<'a> { } } + /// Creates a new encoder that writes to the given buffer. + /// + /// Preallocates the temp buffer for `capacity` bytes. + pub fn new_with_capacity(buf: &'a mut dyn BufMut, capacity: usize) -> Self { + Self { + buf, + temp_buf: Vec::with_capacity(capacity), + } + } + /// Stores a field to be encoded. pub fn encode_field(mut self, value: &T) -> Self { ::encode(value, &mut self.temp_buf); diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index edcf0da1746..51b5be29e06 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -208,8 +208,12 @@ impl BranchNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - let mut buf = vec![]; - let mut encoder = Encoder::new(&mut buf); + // 16 items * 32 bytes, assuming branches don't have values + // in a state or storage trie + const RLP_ENCODED_SIZE: usize = 512; + + let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); + let mut encoder = Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE); for child in self.choices.iter() { match child.compute_hash() { NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 9878a6f5b47..74abfa6a921 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -149,8 +149,11 @@ impl ExtensionNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - let mut buf = vec![]; - let mut encoder = Encoder::new(&mut buf).encode_bytes(&self.prefix.encode_compact()); + // (2 items) * 32 bytes, prefix and child (hash) + const RLP_ENCODED_SIZE: usize = 64; + let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); + let mut encoder = Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE) + .encode_bytes(&self.prefix.encode_compact()); encoder = self.child.compute_hash().encode(encoder); encoder.finish(); buf diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index dce9a34416e..8629fc90f70 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -119,8 +119,12 @@ impl LeafNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - let mut buf = vec![]; - Encoder::new(&mut buf) + // (1 items + 4 items) * 32 bytes, assuming worst case in which it contains an + // ethereum account (4 item RLP) and a 32 byte partial + const RLP_ENCODED_SIZE: usize = 160; + + let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); + Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE) .encode_bytes(&self.partial.encode_compact()) .encode_bytes(&self.value) .finish(); From cc65ec94e1da19860a07290851194d430367e3ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 25 Sep 2025 18:04:36 -0300 Subject: [PATCH 02/50] fast rlp encoding for branch --- crates/common/trie/node/branch.rs | 53 ++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 51b5be29e06..5dedf451fc1 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -207,24 +207,55 @@ impl BranchNode { } /// Encodes the node + /// Assumptions: + /// - No value + /// - 32 byte choices pub fn encode_raw(&self) -> Vec { - // 16 items * 32 bytes, assuming branches don't have values - // in a state or storage trie - const RLP_ENCODED_SIZE: usize = 512; + // 16 items * 33 bytes, assuming branches don't have values + // in a state or storage trie. + // plus a 3 byte headroom for the first prefix and possibly payload len + const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; - let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); - let mut encoder = Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE); + let mut buf = Vec::with_capacity(MAX_RLP_ENCODED_SIZE); + buf.extend([0x00; 3]); + + let mut payload_len = 1; for child in self.choices.iter() { match child.compute_hash() { - NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), - child @ NodeHash::Inline(raw) if raw.1 != 0 => { - encoder = encoder.encode_raw(child.as_ref()) + NodeHash::Hashed(hash) => { + buf.push(0xa0); + buf.extend(hash.0); + payload_len += 33; + } + NodeHash::Inline(raw) if raw.1 != 0 => { + buf.push(0x80 + raw.1); + buf.extend_from_slice(&raw.0[..raw.1 as usize]); + payload_len += 1 + raw.1 as usize; + } + _ => { + buf.push(0x80); + payload_len += 1; } - _ => encoder = encoder.encode_bytes(&[]), } } - encoder = encoder.encode_bytes(&self.value); - encoder.finish(); + + // branch's value is empty + buf.push(0x80); + + if payload_len < 56 { + buf[2] = 0xc0 + payload_len as u8; + buf.remove(0); + buf.remove(0); + } else if payload_len < u8::MAX as usize { + buf[1] = 0xf8; + buf[2] = payload_len as u8; + buf.remove(0); + } else { + buf[0] = 0xf9; + buf[1] = ((payload_len as u16) >> 8) as u8; + buf[2] = (payload_len & 0xff) as u8; + } + buf } From 0d87f5b077ad60c53dbdecc0781bbdebac7acc5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 10:46:03 -0300 Subject: [PATCH 03/50] write directly to hasher --- crates/common/trie/node/branch.rs | 122 +++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 21 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 5dedf451fc1..d1b367f6963 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,4 +1,9 @@ +use std::io::Write; + +use digest::core_api::CoreWrapper; +use ethereum_types::H256; use ethrex_rlp::structs::Encoder; +use sha3::{Digest, Keccak256, Keccak256Core}; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -203,7 +208,71 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(&self.encode_raw()) + let mut hasher = Keccak256::new(); + self.encode_write(&mut hasher); + let hash = hasher.finalize(); + NodeHash::Hashed(H256::from_slice(&hash)) + } + + /// Encodes the node + /// Assumptions: + /// - No value + /// - 32 byte choices + pub fn encode_write(&self, buf: &mut impl Write) { + // 16 items * 33 bytes, assuming branches don't have values + // in a state or storage trie. + // plus a 3 byte headroom for the payload prefix + //const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; + + let mut hashes = Vec::with_capacity(self.choices.len()); + + // calculate payload len + let mut payload_len = 1; + for child in self.choices.iter() { + let hash = child.compute_hash(); + match hash { + NodeHash::Hashed(_) => { + payload_len += 33; + } + NodeHash::Inline(raw) if raw.1 != 0 => { + payload_len += 1 + raw.1 as usize; + } + _ => { + payload_len += 1; + } + } + hashes.push(hash); + } + // write payload prefix + if payload_len < 56 { + buf.write(&[0xc0 + payload_len as u8]); + } else if payload_len < u8::MAX as usize { + buf.write(&[0xf8]); + buf.write(&[payload_len as u8]); + } else { + buf.write(&[0xf9]); + buf.write(&[((payload_len as u16) >> 8) as u8]); + buf.write(&[(payload_len & 0xff) as u8]); + } + // push payload + for hash in hashes { + match hash { + NodeHash::Hashed(hash) => { + buf.write(&[0xa0]); + buf.write(&hash.0); + } + NodeHash::Inline(raw) if raw.1 != 0 => { + buf.write(&[0x80 + raw.1]); + buf.write(&raw.0[..raw.1 as usize]); + } + _ => { + buf.write(&[0x80]); + } + } + } + + // branch's value is empty + buf.write(&[0x80]); } /// Encodes the node @@ -213,28 +282,53 @@ impl BranchNode { pub fn encode_raw(&self) -> Vec { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. - // plus a 3 byte headroom for the first prefix and possibly payload len + // plus a 3 byte headroom for the payload prefix const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; let mut buf = Vec::with_capacity(MAX_RLP_ENCODED_SIZE); - buf.extend([0x00; 3]); + let mut hashes = Vec::with_capacity(self.choices.len()); + // calculate payload len let mut payload_len = 1; for child in self.choices.iter() { - match child.compute_hash() { + let hash = child.compute_hash(); + match hash { + NodeHash::Hashed(_) => { + payload_len += 33; + } + NodeHash::Inline(raw) if raw.1 != 0 => { + payload_len += 1 + raw.1 as usize; + } + _ => { + payload_len += 1; + } + } + hashes.push(hash); + } + // push payload prefix + if payload_len < 56 { + buf.push(0xc0 + payload_len as u8); + } else if payload_len < u8::MAX as usize { + buf.push(0xf8); + buf.push(payload_len as u8); + } else { + buf.push(0xf9); + buf.push(((payload_len as u16) >> 8) as u8); + buf.push((payload_len & 0xff) as u8); + } + // push payload + for hash in hashes { + match hash { NodeHash::Hashed(hash) => { buf.push(0xa0); buf.extend(hash.0); - payload_len += 33; } NodeHash::Inline(raw) if raw.1 != 0 => { buf.push(0x80 + raw.1); buf.extend_from_slice(&raw.0[..raw.1 as usize]); - payload_len += 1 + raw.1 as usize; } _ => { buf.push(0x80); - payload_len += 1; } } } @@ -242,20 +336,6 @@ impl BranchNode { // branch's value is empty buf.push(0x80); - if payload_len < 56 { - buf[2] = 0xc0 + payload_len as u8; - buf.remove(0); - buf.remove(0); - } else if payload_len < u8::MAX as usize { - buf[1] = 0xf8; - buf[2] = payload_len as u8; - buf.remove(0); - } else { - buf[0] = 0xf9; - buf[1] = ((payload_len as u16) >> 8) as u8; - buf[2] = (payload_len & 0xff) as u8; - } - buf } From 2475090b6e1be1b6348dbc280bb3de580d93498f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 11:16:08 -0300 Subject: [PATCH 04/50] dont use encode_write --- crates/common/trie/node/branch.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index d1b367f6963..9b12ab194a7 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -208,10 +208,7 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - let mut hasher = Keccak256::new(); - self.encode_write(&mut hasher); - let hash = hasher.finalize(); - NodeHash::Hashed(H256::from_slice(&hash)) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node From 973116554ce2cf1a07a07e51225dd15c0e40ad25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 12:35:03 -0300 Subject: [PATCH 05/50] precalc len, assume no inline --- crates/common/trie/node/branch.rs | 47 +++++++++++-------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 9b12ab194a7..36802ebf4d7 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -279,50 +279,35 @@ impl BranchNode { pub fn encode_raw(&self) -> Vec { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. - // plus a 3 byte headroom for the payload prefix + // plus a 3 byte headroom for the first prefix and payload len const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; - let mut buf = Vec::with_capacity(MAX_RLP_ENCODED_SIZE); - let mut hashes = Vec::with_capacity(self.choices.len()); - // calculate payload len - let mut payload_len = 1; - for child in self.choices.iter() { - let hash = child.compute_hash(); - match hash { - NodeHash::Hashed(_) => { - payload_len += 33; - } - NodeHash::Inline(raw) if raw.1 != 0 => { - payload_len += 1 + raw.1 as usize; - } - _ => { - payload_len += 1; - } - } - hashes.push(hash); - } - // push payload prefix + let payload_len = self.choices.iter().fold(1, |payload_len, child| { + payload_len + if child.is_valid() { 33 } else { 1 } + }); if payload_len < 56 { buf.push(0xc0 + payload_len as u8); } else if payload_len < u8::MAX as usize { - buf.push(0xf8); - buf.push(payload_len as u8); + buf.extend([0xf8, payload_len as u8]); } else { - buf.push(0xf9); - buf.push(((payload_len as u16) >> 8) as u8); - buf.push((payload_len & 0xff) as u8); + buf.extend([ + 0xf9, + ((payload_len as u16) >> 8) as u8, + (payload_len & 0xff) as u8, + ]); } - // push payload - for hash in hashes { - match hash { + + for child in self.choices.iter() { + match child.compute_hash() { NodeHash::Hashed(hash) => { buf.push(0xa0); buf.extend(hash.0); } NodeHash::Inline(raw) if raw.1 != 0 => { - buf.push(0x80 + raw.1); - buf.extend_from_slice(&raw.0[..raw.1 as usize]); + unreachable!(); + // buf.push(0x80 + raw.1); + // buf.extend_from_slice(&raw.0[..raw.1 as usize]); } _ => { buf.push(0x80); From bf46e33c2ecffec171daefd21b2fea9b649c15a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 12:53:44 -0300 Subject: [PATCH 06/50] precalc len and write to hasher --- crates/common/trie/node/branch.rs | 55 +++++++++++-------------------- 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 36802ebf4d7..abbb345e4fa 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -208,7 +208,10 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(&self.encode_raw()) + let mut hasher = Keccak256::new(); + self.encode_write(&mut hasher); + let hash = hasher.finalize(); + NodeHash::Hashed(H256::from_slice(&hash)) } /// Encodes the node @@ -216,51 +219,31 @@ impl BranchNode { /// - No value /// - 32 byte choices pub fn encode_write(&self, buf: &mut impl Write) { - // 16 items * 33 bytes, assuming branches don't have values - // in a state or storage trie. - // plus a 3 byte headroom for the payload prefix - //const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; - - let mut hashes = Vec::with_capacity(self.choices.len()); - - // calculate payload len - let mut payload_len = 1; - for child in self.choices.iter() { - let hash = child.compute_hash(); - match hash { - NodeHash::Hashed(_) => { - payload_len += 33; - } - NodeHash::Inline(raw) if raw.1 != 0 => { - payload_len += 1 + raw.1 as usize; - } - _ => { - payload_len += 1; - } - } - hashes.push(hash); - } - // write payload prefix + let payload_len = self.choices.iter().fold(1, |payload_len, child| { + payload_len + if child.is_valid() { 33 } else { 1 } + }); if payload_len < 56 { buf.write(&[0xc0 + payload_len as u8]); } else if payload_len < u8::MAX as usize { - buf.write(&[0xf8]); - buf.write(&[payload_len as u8]); + buf.write(&[0xf8, payload_len as u8]); } else { - buf.write(&[0xf9]); - buf.write(&[((payload_len as u16) >> 8) as u8]); - buf.write(&[(payload_len & 0xff) as u8]); + buf.write(&[ + 0xf9, + ((payload_len as u16) >> 8) as u8, + (payload_len & 0xff) as u8, + ]); } - // push payload - for hash in hashes { - match hash { + + for child in self.choices.iter() { + match child.compute_hash() { NodeHash::Hashed(hash) => { buf.write(&[0xa0]); buf.write(&hash.0); } NodeHash::Inline(raw) if raw.1 != 0 => { - buf.write(&[0x80 + raw.1]); - buf.write(&raw.0[..raw.1 as usize]); + unreachable!(); + // buf.push(0x80 + raw.1); + // buf.extend_from_slice(&raw.0[..raw.1 as usize]); } _ => { buf.write(&[0x80]); From 9433e3c703d24a67c31a38e844fd8749d002e702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 16:12:19 -0300 Subject: [PATCH 07/50] quick rlp and hashing for the rest of nodes --- crates/common/trie/node/branch.rs | 5 +++ crates/common/trie/node/extension.rs | 42 ++++++++++++++++++- crates/common/trie/node/leaf.rs | 62 +++++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index abbb345e4fa..526c02dc330 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -227,6 +227,7 @@ impl BranchNode { } else if payload_len < u8::MAX as usize { buf.write(&[0xf8, payload_len as u8]); } else { + // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) buf.write(&[ 0xf9, ((payload_len as u16) >> 8) as u8, @@ -241,6 +242,10 @@ impl BranchNode { buf.write(&hash.0); } NodeHash::Inline(raw) if raw.1 != 0 => { + // ASSUMPTION: nodes would never be inlined + // WARN: assumption is wrong actually, but the probability of an inlined node + // is pretty small. + // TODO: fix this unreachable!(); // buf.push(0x80 + raw.1); // buf.extend_from_slice(&raw.0[..raw.1 as usize]); diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 74abfa6a921..24cd1df4903 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -1,4 +1,8 @@ +use std::io::Write; + +use ethereum_types::H256; use ethrex_rlp::structs::Encoder; +use sha3::{Digest, Keccak256}; use crate::ValueRLP; use crate::nibbles::Nibbles; @@ -144,7 +148,43 @@ impl ExtensionNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(&self.encode_raw()) + let mut hasher = Keccak256::new(); + self.encode_write(&mut hasher); + let hash = hasher.finalize(); + NodeHash::Hashed(H256::from_slice(&hash)) + } + + /// Encodes the node + pub fn encode_write(&self, buf: &mut impl Write) { + let prefix_encoded = self.prefix.encode_compact(); + + // calc total payload len + // ASSUMPTION: there are no inline node refs, so child len is 1 + 32 bytes + // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) + let payload_len = 1 + prefix_encoded.len() + 33; + + // write payload prefix + if payload_len < 56 { + buf.write(&[0xc0 + payload_len as u8]); + } else if payload_len < u8::MAX as usize { + buf.write(&[0xf8, payload_len as u8]); + } else { + // ASSUMPTION: list len will never be greater than (1 + 32) * 2 = 66 bytes + unreachable!(); + } + + // write prefix + // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) + buf.write(&[0x80 + prefix_encoded.len() as u8]); + buf.write(&prefix_encoded); + + // write child hash + let NodeHash::Hashed(child_hash) = self.child.compute_hash() else { + // ASSUMPTION: there are no inline node refs + unreachable!(); + }; + buf.write(&[0x20]); + buf.write(&child_hash.0); } /// Encodes the node diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 8629fc90f70..5ca41a94ddb 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -1,4 +1,8 @@ +use std::io::Write; + +use ethereum_types::H256; use ethrex_rlp::structs::Encoder; +use sha3::{Digest, Keccak256}; use crate::{ValueRLP, error::TrieError, nibbles::Nibbles, node::BranchNode, node_hash::NodeHash}; @@ -114,7 +118,63 @@ impl LeafNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - NodeHash::from_encoded_raw(&self.encode_raw()) + let mut hasher = Keccak256::new(); + self.encode_write(&mut hasher); + let hash = hasher.finalize(); + NodeHash::Hashed(H256::from_slice(&hash)) + } + + /// Encodes the node + pub fn encode_write(&self, buf: &mut impl Write) { + let partial_encoded = self.partial.encode_compact(); + + // calc total payload len + let payload_len = { + // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) + let partial_len = 1 + partial_encoded.len(); + let value_prefix_len = match self.value.len() { + ..56 => 1, + 56..255 => 2, + _ => 3, // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) + }; + partial_len + value_prefix_len + self.value.len() + }; + // write payload prefix + if payload_len < 56 { + buf.write(&[0xc0 + payload_len as u8]); + } else if payload_len < u8::MAX as usize { + buf.write(&[0xf8, payload_len as u8]); + } else { + // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) + buf.write(&[ + 0xf9, + ((payload_len as u16) >> 8) as u8, + (payload_len & 0xff) as u8, + ]); + } + + // write partial + buf.write(&[0x80 + partial_encoded.len() as u8]); + buf.write(&partial_encoded); + + // write value prefix + if self.value.len() == 1 && self.value[0] < 0x80 { + // value is its own encoding + } else if self.value.len() < 56 { + buf.write(&[0x80 + payload_len as u8]); + } else if self.value.len() < u8::MAX as usize { + buf.write(&[0xb8, payload_len as u8]); + } else { + // ASSUMPTION: value len will never be >u16::MAX (2 bytes len) + buf.write(&[ + 0xb9, + ((payload_len as u16) >> 8) as u8, + (payload_len & 0xff) as u8, + ]); + } + + // write value + buf.write(&self.value); } /// Encodes the node From 7132ea091e262217ddf0df68e4b650bdf8621534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 16:45:23 -0300 Subject: [PATCH 08/50] fix partial/prefix encoding --- crates/common/trie/node/extension.rs | 18 +++++++++++++++--- crates/common/trie/node/leaf.rs | 14 ++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 24cd1df4903..5be19e9f886 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -161,7 +161,14 @@ impl ExtensionNode { // calc total payload len // ASSUMPTION: there are no inline node refs, so child len is 1 + 32 bytes // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) - let payload_len = 1 + prefix_encoded.len() + 33; + let payload_len = { + let prefix_len = if prefix_encoded.len() == 1 { + 1 + } else { + 1 + prefix_encoded.len() + }; + prefix_len + 33 + }; // write payload prefix if payload_len < 56 { @@ -173,9 +180,14 @@ impl ExtensionNode { unreachable!(); } + // write prefix prefix + if prefix_encoded.len() == 1 { + // value is its own encoding + } else { + // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) + buf.write(&[0x80 + prefix_encoded.len() as u8]); + } // write prefix - // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) - buf.write(&[0x80 + prefix_encoded.len() as u8]); buf.write(&prefix_encoded); // write child hash diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 5ca41a94ddb..439ba881984 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -131,7 +131,11 @@ impl LeafNode { // calc total payload len let payload_len = { // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) - let partial_len = 1 + partial_encoded.len(); + let partial_len = if partial_encoded.len() == 1 { + 1 + } else { + 1 + partial_encoded.len() + }; let value_prefix_len = match self.value.len() { ..56 => 1, 56..255 => 2, @@ -153,8 +157,14 @@ impl LeafNode { ]); } + // write partial prefix + if partial_encoded.len() == 1 { + // value is its own encoding + } else { + // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) + buf.write(&[0x80 + partial_encoded.len() as u8]); + } // write partial - buf.write(&[0x80 + partial_encoded.len() as u8]); buf.write(&partial_encoded); // write value prefix From 71519ea04ea751fd3173135bfd8788bc272e286e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:04:25 -0300 Subject: [PATCH 09/50] fix partial/prefix encoding for case len 1 --- crates/common/trie/node/extension.rs | 2 +- crates/common/trie/node/leaf.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 5be19e9f886..1c360f13381 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -181,7 +181,7 @@ impl ExtensionNode { } // write prefix prefix - if prefix_encoded.len() == 1 { + if prefix_encoded.len() == 1 && prefix_encoded[0] < 0x80 { // value is its own encoding } else { // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 439ba881984..41873e7a481 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -158,7 +158,7 @@ impl LeafNode { } // write partial prefix - if partial_encoded.len() == 1 { + if partial_encoded.len() == 1 && partial_encoded[0] < 0x80 { // value is its own encoding } else { // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) @@ -168,6 +168,7 @@ impl LeafNode { buf.write(&partial_encoded); // write value prefix + // TODO: not possible to be 1 byte if self.value.len() == 1 && self.value[0] < 0x80 { // value is its own encoding } else if self.value.len() < 56 { From 04faaddf84a62ef5521a3c909fa48f88af51abd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:13:54 -0300 Subject: [PATCH 10/50] use correct len on value encoding --- crates/common/trie/node/leaf.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 41873e7a481..65f779fba02 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -172,15 +172,15 @@ impl LeafNode { if self.value.len() == 1 && self.value[0] < 0x80 { // value is its own encoding } else if self.value.len() < 56 { - buf.write(&[0x80 + payload_len as u8]); + buf.write(&[0x80 + self.value.len() as u8]); } else if self.value.len() < u8::MAX as usize { - buf.write(&[0xb8, payload_len as u8]); + buf.write(&[0xb8, self.value.len() as u8]); } else { // ASSUMPTION: value len will never be >u16::MAX (2 bytes len) buf.write(&[ 0xb9, - ((payload_len as u16) >> 8) as u8, - (payload_len & 0xff) as u8, + ((self.value.len() as u16) >> 8) as u8, + (self.value.len() & 0xff) as u8, ]); } From f7195f26e684d82380fcf97e5c4b8f3e2db3d017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:14:53 -0300 Subject: [PATCH 11/50] fix extension hash prefix --- crates/common/trie/node/extension.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 1c360f13381..ce94662f0c8 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -195,7 +195,7 @@ impl ExtensionNode { // ASSUMPTION: there are no inline node refs unreachable!(); }; - buf.write(&[0x20]); + buf.write(&[0xa0]); buf.write(&child_hash.0); } From 3cbe8739df862812df8f3b445bce1ebb230da274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:15:44 -0300 Subject: [PATCH 12/50] fix conditional on < 0x80 --- crates/common/trie/node/extension.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index ce94662f0c8..49e2c302e2f 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -162,7 +162,7 @@ impl ExtensionNode { // ASSUMPTION: there are no inline node refs, so child len is 1 + 32 bytes // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) let payload_len = { - let prefix_len = if prefix_encoded.len() == 1 { + let prefix_len = if prefix_encoded.len() == 1 && prefix_encoded[0] < 0x80 { 1 } else { 1 + prefix_encoded.len() From b99f69702e7d0a00b547e80c98fcd32034b15aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:17:27 -0300 Subject: [PATCH 13/50] fix value prefix match case --- crates/common/trie/node/leaf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 65f779fba02..5bb894d84b6 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -131,14 +131,14 @@ impl LeafNode { // calc total payload len let payload_len = { // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) - let partial_len = if partial_encoded.len() == 1 { + let partial_len = if partial_encoded.len() == 1 && partial_encoded[0] < 0x80 { 1 } else { 1 + partial_encoded.len() }; let value_prefix_len = match self.value.len() { ..56 => 1, - 56..255 => 2, + 56..256 => 2, _ => 3, // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) }; partial_len + value_prefix_len + self.value.len() From 0c3571ce1212dddc112e5431cba25c25dbfb313f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:40:43 -0300 Subject: [PATCH 14/50] add todo and unreachable --- crates/common/trie/node/branch.rs | 2 +- crates/common/trie/node/leaf.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 526c02dc330..c4b70be8374 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -224,7 +224,7 @@ impl BranchNode { }); if payload_len < 56 { buf.write(&[0xc0 + payload_len as u8]); - } else if payload_len < u8::MAX as usize { + } else if payload_len < u8::MAX as usize { // TODO: < or <= buf.write(&[0xf8, payload_len as u8]); } else { // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 5bb894d84b6..4694705cd7d 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -168,12 +168,11 @@ impl LeafNode { buf.write(&partial_encoded); // write value prefix - // TODO: not possible to be 1 byte if self.value.len() == 1 && self.value[0] < 0x80 { - // value is its own encoding + unreachable!() } else if self.value.len() < 56 { buf.write(&[0x80 + self.value.len() as u8]); - } else if self.value.len() < u8::MAX as usize { + } else if self.value.len() < u8::MAX as usize { // TODO: < or <= buf.write(&[0xb8, self.value.len() as u8]); } else { // ASSUMPTION: value len will never be >u16::MAX (2 bytes len) From 451b340b6752bd63d2787017ee495a72a8f6f51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Mon, 29 Sep 2025 17:50:45 -0300 Subject: [PATCH 15/50] bad assumption of leaf with single byte value --- crates/common/trie/node/leaf.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 4694705cd7d..faa3531423f 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -137,7 +137,8 @@ impl LeafNode { 1 + partial_encoded.len() }; let value_prefix_len = match self.value.len() { - ..56 => 1, + 1 if self.value[0] < 0x80 => 0, + 1 | ..56 => 1, 56..256 => 2, _ => 3, // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) }; @@ -169,7 +170,7 @@ impl LeafNode { // write value prefix if self.value.len() == 1 && self.value[0] < 0x80 { - unreachable!() + // value is its own encoding } else if self.value.len() < 56 { buf.write(&[0x80 + self.value.len() as u8]); } else if self.value.len() < u8::MAX as usize { // TODO: < or <= From 2f6f9812b53eb38537542f7714ae83ee0112bc52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 13:28:17 -0300 Subject: [PATCH 16/50] memoize hash --- crates/common/trie/node.rs | 29 ++++++++++++++++++++++++++++- crates/common/trie/node/leaf.rs | 3 ++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 8818c54f8a0..337305fe2a4 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -80,7 +80,10 @@ impl NodeRef { pub fn compute_hash(&self) -> NodeHash { match self { - NodeRef::Node(node, hash) => *hash.get_or_init(|| node.compute_hash()), + NodeRef::Node(node, hash) => *hash.get_or_init(|| { + node.memoize_hash(); + node.compute_hash() + }), NodeRef::Hash(hash) => *hash, } } @@ -285,6 +288,30 @@ impl Node { Node::Leaf(n) => n.compute_hash(), } } + + pub fn memoize_hash(&self) { + match self { + Node::Branch(n) => { + for child in &n.choices { + match child { + NodeRef::Node(node, hash) if hash.get().is_none() => { + node.memoize_hash(); + let _ = hash.set(node.compute_hash()); + } + _ => {} + } + } + } + Node::Extension(n) => match &n.child { + NodeRef::Node(node, hash) if hash.get().is_none() => { + node.memoize_hash(); + let _ = hash.set(node.compute_hash()); + } + _ => {} + }, + _ => {} + } + } } fn decode_child(rlp: &[u8]) -> NodeHash { diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index faa3531423f..f110a7da5c3 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -173,7 +173,8 @@ impl LeafNode { // value is its own encoding } else if self.value.len() < 56 { buf.write(&[0x80 + self.value.len() as u8]); - } else if self.value.len() < u8::MAX as usize { // TODO: < or <= + } else if self.value.len() < u8::MAX as usize { + // TODO: < or <= buf.write(&[0xb8, self.value.len() as u8]); } else { // ASSUMPTION: value len will never be >u16::MAX (2 bytes len) From dfa1d2782d7cc154ec0a9b8c729306fce6ce6337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 15:08:06 -0300 Subject: [PATCH 17/50] use encode_raw for branch --- crates/common/trie/node/branch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index c4b70be8374..5f6ca7c2def 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -209,7 +209,7 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { let mut hasher = Keccak256::new(); - self.encode_write(&mut hasher); + hasher.update(&self.encode_raw()); let hash = hasher.finalize(); NodeHash::Hashed(H256::from_slice(&hash)) } From d8e77d9248e9be3576b867671015521438abb300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 15:33:59 -0300 Subject: [PATCH 18/50] remove write, add arrayvec buf --- Cargo.lock | 1 + crates/common/trie/Cargo.toml | 1 + crates/common/trie/node/branch.rs | 52 +++---------------------------- 3 files changed, 6 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ac1094e5b2d..50a1547825b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4571,6 +4571,7 @@ name = "ethrex-trie" version = "0.1.0" dependencies = [ "anyhow", + "arrayvec", "bytes", "cita_trie", "criterion", diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index e5badcc42bc..b8c2587c932 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -22,6 +22,7 @@ rocksdb = { workspace = true, optional = true } smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true +arrayvec = "0.7.6" [features] default = [] diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 5f6ca7c2def..9a9c857b571 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,5 +1,6 @@ use std::io::Write; +use arrayvec::ArrayVec; use digest::core_api::CoreWrapper; use ethereum_types::H256; use ethrex_rlp::structs::Encoder; @@ -214,52 +215,6 @@ impl BranchNode { NodeHash::Hashed(H256::from_slice(&hash)) } - /// Encodes the node - /// Assumptions: - /// - No value - /// - 32 byte choices - pub fn encode_write(&self, buf: &mut impl Write) { - let payload_len = self.choices.iter().fold(1, |payload_len, child| { - payload_len + if child.is_valid() { 33 } else { 1 } - }); - if payload_len < 56 { - buf.write(&[0xc0 + payload_len as u8]); - } else if payload_len < u8::MAX as usize { // TODO: < or <= - buf.write(&[0xf8, payload_len as u8]); - } else { - // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) - buf.write(&[ - 0xf9, - ((payload_len as u16) >> 8) as u8, - (payload_len & 0xff) as u8, - ]); - } - - for child in self.choices.iter() { - match child.compute_hash() { - NodeHash::Hashed(hash) => { - buf.write(&[0xa0]); - buf.write(&hash.0); - } - NodeHash::Inline(raw) if raw.1 != 0 => { - // ASSUMPTION: nodes would never be inlined - // WARN: assumption is wrong actually, but the probability of an inlined node - // is pretty small. - // TODO: fix this - unreachable!(); - // buf.push(0x80 + raw.1); - // buf.extend_from_slice(&raw.0[..raw.1 as usize]); - } - _ => { - buf.write(&[0x80]); - } - } - } - - // branch's value is empty - buf.write(&[0x80]); - } - /// Encodes the node /// Assumptions: /// - No value @@ -268,8 +223,9 @@ impl BranchNode { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. // plus a 3 byte headroom for the first prefix and payload len - const MAX_RLP_ENCODED_SIZE: usize = 528 + 3; - let mut buf = Vec::with_capacity(MAX_RLP_ENCODED_SIZE); + // plus a byte for the empty value + const MAX_RLP_ENCODED_SIZE: usize = 16*33 + 3 + 1; + let mut buf: ArrayVec = ArrayVec::new(); let payload_len = self.choices.iter().fold(1, |payload_len, child| { payload_len + if child.is_valid() { 33 } else { 1 } From 7d9aaac94b740e52cc83da5e2261cd60097828bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 15:37:21 -0300 Subject: [PATCH 19/50] lint --- crates/common/trie/node.rs | 2 +- crates/common/trie/node/branch.rs | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 337305fe2a4..62e696383b1 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -219,7 +219,7 @@ impl Node { /// Encodes the node pub fn encode_raw(&self) -> Vec { match self { - Node::Branch(n) => n.encode_raw(), + Node::Branch(n) => n.encode_raw().to_vec(), Node::Extension(n) => n.encode_raw(), Node::Leaf(n) => n.encode_raw(), } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 9a9c857b571..4b6f1cb5777 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -10,6 +10,8 @@ use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::Nod use super::{ExtensionNode, LeafNode, Node, NodeRef, ValueOrHash}; +pub const BRANCH_MAX_RLP_ENCODED_SIZE: usize = 16*33 + 3 + 1; + /// Branch Node of an an Ethereum Compatible Patricia Merkle Trie /// Contains the node's value and the hash of its children nodes #[derive(Debug, Clone, PartialEq, Default)] @@ -219,13 +221,12 @@ impl BranchNode { /// Assumptions: /// - No value /// - 32 byte choices - pub fn encode_raw(&self) -> Vec { + pub fn encode_raw(&self) -> ArrayVec { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. // plus a 3 byte headroom for the first prefix and payload len // plus a byte for the empty value - const MAX_RLP_ENCODED_SIZE: usize = 16*33 + 3 + 1; - let mut buf: ArrayVec = ArrayVec::new(); + let mut buf: ArrayVec = ArrayVec::new(); let payload_len = self.choices.iter().fold(1, |payload_len, child| { payload_len + if child.is_valid() { 33 } else { 1 } @@ -275,7 +276,7 @@ impl BranchNode { node_path: &mut Vec>, ) -> Result<(), TrieError> { // Add self to node_path (if not inlined in parent) - let encoded = self.encode_raw(); + let encoded = self.encode_raw().to_vec(); if encoded.len() >= 32 { node_path.push(encoded); }; From b6cc6f69682286b91bf99f45c746c36d9cb08622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 15:51:54 -0300 Subject: [PATCH 20/50] remove arrayvec --- Cargo.lock | 1 - crates/common/trie/Cargo.toml | 1 - crates/common/trie/node.rs | 2 +- crates/common/trie/node/branch.rs | 9 ++++----- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50a1547825b..ac1094e5b2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4571,7 +4571,6 @@ name = "ethrex-trie" version = "0.1.0" dependencies = [ "anyhow", - "arrayvec", "bytes", "cita_trie", "criterion", diff --git a/crates/common/trie/Cargo.toml b/crates/common/trie/Cargo.toml index b8c2587c932..e5badcc42bc 100644 --- a/crates/common/trie/Cargo.toml +++ b/crates/common/trie/Cargo.toml @@ -22,7 +22,6 @@ rocksdb = { workspace = true, optional = true } smallvec = { version = "1.10.0", features = ["const_generics", "union"] } digest = "0.10.6" lazy_static.workspace = true -arrayvec = "0.7.6" [features] default = [] diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 62e696383b1..337305fe2a4 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -219,7 +219,7 @@ impl Node { /// Encodes the node pub fn encode_raw(&self) -> Vec { match self { - Node::Branch(n) => n.encode_raw().to_vec(), + Node::Branch(n) => n.encode_raw(), Node::Extension(n) => n.encode_raw(), Node::Leaf(n) => n.encode_raw(), } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 4b6f1cb5777..852d4daa919 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -10,8 +10,6 @@ use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::Nod use super::{ExtensionNode, LeafNode, Node, NodeRef, ValueOrHash}; -pub const BRANCH_MAX_RLP_ENCODED_SIZE: usize = 16*33 + 3 + 1; - /// Branch Node of an an Ethereum Compatible Patricia Merkle Trie /// Contains the node's value and the hash of its children nodes #[derive(Debug, Clone, PartialEq, Default)] @@ -221,12 +219,13 @@ impl BranchNode { /// Assumptions: /// - No value /// - 32 byte choices - pub fn encode_raw(&self) -> ArrayVec { + pub fn encode_raw(&self) -> Vec { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. // plus a 3 byte headroom for the first prefix and payload len // plus a byte for the empty value - let mut buf: ArrayVec = ArrayVec::new(); + const MAX_RLP_SIZE: usize = 16*33 + 3 + 1; + let mut buf: Vec = Vec::with_capacity(MAX_RLP_SIZE); let payload_len = self.choices.iter().fold(1, |payload_len, child| { payload_len + if child.is_valid() { 33 } else { 1 } @@ -276,7 +275,7 @@ impl BranchNode { node_path: &mut Vec>, ) -> Result<(), TrieError> { // Add self to node_path (if not inlined in parent) - let encoded = self.encode_raw().to_vec(); + let encoded = self.encode_raw(); if encoded.len() >= 32 { node_path.push(encoded); }; From 0641b1f1a683373e41d06d2c5faf01d8cba24451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 15:52:23 -0300 Subject: [PATCH 21/50] remove arrayvec import --- crates/common/trie/node/branch.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 852d4daa919..f9cf6e0dde0 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,6 +1,5 @@ use std::io::Write; -use arrayvec::ArrayVec; use digest::core_api::CoreWrapper; use ethereum_types::H256; use ethrex_rlp::structs::Encoder; From fa6cd129e3e0b5047b46f02e59a33ed042458c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 16:48:04 -0300 Subject: [PATCH 22/50] add comment to memoize_hashes and rename --- crates/common/trie/node.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 337305fe2a4..1e379914050 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -81,7 +81,7 @@ impl NodeRef { pub fn compute_hash(&self) -> NodeHash { match self { NodeRef::Node(node, hash) => *hash.get_or_init(|| { - node.memoize_hash(); + node.memoize_hashes(); node.compute_hash() }), NodeRef::Hash(hash) => *hash, @@ -289,13 +289,15 @@ impl Node { } } - pub fn memoize_hash(&self) { + /// Recursively memoizes the hashes of all nodes that are in the trie + /// that has `self` as root + pub fn memoize_hashes(&self) { match self { Node::Branch(n) => { for child in &n.choices { match child { NodeRef::Node(node, hash) if hash.get().is_none() => { - node.memoize_hash(); + node.memoize_hashes(); let _ = hash.set(node.compute_hash()); } _ => {} @@ -304,7 +306,7 @@ impl Node { } Node::Extension(n) => match &n.child { NodeRef::Node(node, hash) if hash.get().is_none() => { - node.memoize_hash(); + node.memoize_hashes(); let _ = hash.set(node.compute_hash()); } _ => {} From 24b68047f0be9ce29b4c6eea2aa11d6ff9deddf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 16:59:45 -0300 Subject: [PATCH 23/50] simplify memoize --- crates/common/trie/node.rs | 37 +++++++++++++------------------ crates/common/trie/node/branch.rs | 2 +- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 1e379914050..33face0b327 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -17,7 +17,7 @@ use ethrex_rlp::{ pub use extension::ExtensionNode; pub use leaf::LeafNode; -use crate::{TrieDB, error::TrieError, nibbles::Nibbles}; +use crate::{TrieDB, error::TrieError, nibbles::Nibbles, node}; use super::{ValueRLP, node_hash::NodeHash}; @@ -80,13 +80,19 @@ impl NodeRef { pub fn compute_hash(&self) -> NodeHash { match self { - NodeRef::Node(node, hash) => *hash.get_or_init(|| { - node.memoize_hashes(); - node.compute_hash() - }), + NodeRef::Node(node, hash) => *hash.get_or_init(|| node.compute_hash()), NodeRef::Hash(hash) => *hash, } } + + pub fn memoize_hashes(&self) { + if let NodeRef::Node(node, hash) = &self { + if hash.get().is_none() { + node.memoize_hashes(); + let _ = hash.set(node.compute_hash()); + } + } + } } impl Default for NodeRef { @@ -282,6 +288,7 @@ impl Node { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { + self.memoize_hashes(); match self { Node::Branch(n) => n.compute_hash(), Node::Extension(n) => n.compute_hash(), @@ -289,28 +296,16 @@ impl Node { } } - /// Recursively memoizes the hashes of all nodes that are in the trie - /// that has `self` as root + /// Recursively memoizes the hashes of all nodes of the subtrie that has + /// `self` as root (post-order traversal) pub fn memoize_hashes(&self) { match self { Node::Branch(n) => { for child in &n.choices { - match child { - NodeRef::Node(node, hash) if hash.get().is_none() => { - node.memoize_hashes(); - let _ = hash.set(node.compute_hash()); - } - _ => {} - } + child.memoize_hashes(); } } - Node::Extension(n) => match &n.child { - NodeRef::Node(node, hash) if hash.get().is_none() => { - node.memoize_hashes(); - let _ = hash.set(node.compute_hash()); - } - _ => {} - }, + Node::Extension(n) => n.child.memoize_hashes(), _ => {} } } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index f9cf6e0dde0..6869af3c588 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -223,7 +223,7 @@ impl BranchNode { // in a state or storage trie. // plus a 3 byte headroom for the first prefix and payload len // plus a byte for the empty value - const MAX_RLP_SIZE: usize = 16*33 + 3 + 1; + const MAX_RLP_SIZE: usize = 16 * 33 + 3 + 1; let mut buf: Vec = Vec::with_capacity(MAX_RLP_SIZE); let payload_len = self.choices.iter().fold(1, |payload_len, child| { From 5d3fa97424d0ec9de9047797b108981a2cad6c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 17:16:32 -0300 Subject: [PATCH 24/50] remove fast rlp --- crates/common/trie/node/branch.rs | 43 ++++++------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 6869af3c588..26f88c28530 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,9 +1,6 @@ -use std::io::Write; - -use digest::core_api::CoreWrapper; use ethereum_types::H256; use ethrex_rlp::structs::Encoder; -use sha3::{Digest, Keccak256, Keccak256Core}; +use sha3::{Digest, Keccak256}; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -215,9 +212,6 @@ impl BranchNode { } /// Encodes the node - /// Assumptions: - /// - No value - /// - 32 byte choices pub fn encode_raw(&self) -> Vec { // 16 items * 33 bytes, assuming branches don't have values // in a state or storage trie. @@ -225,41 +219,20 @@ impl BranchNode { // plus a byte for the empty value const MAX_RLP_SIZE: usize = 16 * 33 + 3 + 1; let mut buf: Vec = Vec::with_capacity(MAX_RLP_SIZE); - - let payload_len = self.choices.iter().fold(1, |payload_len, child| { - payload_len + if child.is_valid() { 33 } else { 1 } - }); - if payload_len < 56 { - buf.push(0xc0 + payload_len as u8); - } else if payload_len < u8::MAX as usize { - buf.extend([0xf8, payload_len as u8]); - } else { - buf.extend([ - 0xf9, - ((payload_len as u16) >> 8) as u8, - (payload_len & 0xff) as u8, - ]); - } + let mut encoder = Encoder::new(&mut buf); for child in self.choices.iter() { match child.compute_hash() { - NodeHash::Hashed(hash) => { - buf.push(0xa0); - buf.extend(hash.0); - } - NodeHash::Inline(raw) if raw.1 != 0 => { - unreachable!(); - // buf.push(0x80 + raw.1); - // buf.extend_from_slice(&raw.0[..raw.1 as usize]); - } - _ => { - buf.push(0x80); + NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), + child @ NodeHash::Inline(raw) if raw.1 != 0 => { + encoder = encoder.encode_raw(child.as_ref()) } + _ => encoder = encoder.encode_bytes(&[]), } } - // branch's value is empty - buf.push(0x80); + encoder = encoder.encode_bytes(&self.value); + encoder.finish(); buf } From edf7050d87aaf9d2e9fd13bb9b57fac36e65a338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 17:48:12 -0300 Subject: [PATCH 25/50] dont use encoder --- crates/common/trie/node/branch.rs | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 26f88c28530..1d9868ef1aa 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,6 +1,4 @@ -use ethereum_types::H256; -use ethrex_rlp::structs::Encoder; -use sha3::{Digest, Keccak256}; +use ethrex_rlp::{encode::RLPEncode, structs::Encoder}; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -205,34 +203,29 @@ impl BranchNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - let mut hasher = Keccak256::new(); - hasher.update(&self.encode_raw()); - let hash = hasher.finalize(); - NodeHash::Hashed(H256::from_slice(&hash)) + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node pub fn encode_raw(&self) -> Vec { - // 16 items * 33 bytes, assuming branches don't have values - // in a state or storage trie. - // plus a 3 byte headroom for the first prefix and payload len - // plus a byte for the empty value - const MAX_RLP_SIZE: usize = 16 * 33 + 3 + 1; - let mut buf: Vec = Vec::with_capacity(MAX_RLP_SIZE); - let mut encoder = Encoder::new(&mut buf); + // 3 byte payload prefix + // + 16 choices * 33 bytes + // + 3 bytes value prefix + // + value + let max_rlp_size: usize = 3 + 16 * 33 + 3 + self.value.len(); + let mut buf: Vec = Vec::with_capacity(max_rlp_size); for child in self.choices.iter() { match child.compute_hash() { - NodeHash::Hashed(hash) => encoder = encoder.encode_bytes(&hash.0), + NodeHash::Hashed(hash) => hash.0.encode(&mut buf), child @ NodeHash::Inline(raw) if raw.1 != 0 => { - encoder = encoder.encode_raw(child.as_ref()) + buf.extend(child.as_ref()); } - _ => encoder = encoder.encode_bytes(&[]), + _ => [].encode(&mut buf), } } - encoder = encoder.encode_bytes(&self.value); - encoder.finish(); + self.value.encode(&mut buf); buf } From 44035c4c9794bfbbb96edfe96bf69da7fdfa5fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 18:31:02 -0300 Subject: [PATCH 26/50] update len --- crates/common/rlp/encode.rs | 18 ++++++++++++++++++ crates/common/trie/node/branch.rs | 27 ++++++++++++++++++++------- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 1de929a8b9d..f3a75c1ce78 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -201,6 +201,24 @@ pub fn encode_length(total_len: usize, buf: &mut dyn BufMut) { } } +pub fn encoded_prefix_bytes(total_len: usize) -> usize { + const U8_MAX_PLUS_ONE: usize = u8::MAX as usize + 1; + const U16_MAX_PLUS_ONE: usize = u16::MAX as usize + 1; + match total_len { + 0..56 => 1, + 56..U8_MAX_PLUS_ONE => 2, + U8_MAX_PLUS_ONE..=U16_MAX_PLUS_ONE => 3, + _ => { + usize::BITS as usize / 8 + - total_len + .to_be_bytes() + .iter() + .position(|&x| x != 0) + .unwrap() + } + } +} + impl RLPEncode for (S, T) { fn encode(&self, buf: &mut dyn BufMut) { let total_len = self.0.length() + self.1.length(); diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 1d9868ef1aa..a5fb2447df5 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,4 +1,8 @@ -use ethrex_rlp::{encode::RLPEncode, structs::Encoder}; +use ethrex_rlp::{ + encode::{RLPEncode, encode_length, encoded_prefix_bytes}, + structs::Encoder, +}; +use serde::de::value; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -208,12 +212,21 @@ impl BranchNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - // 3 byte payload prefix - // + 16 choices * 33 bytes - // + 3 bytes value prefix - // + value - let max_rlp_size: usize = 3 + 16 * 33 + 3 + self.value.len(); - let mut buf: Vec = Vec::with_capacity(max_rlp_size); + let payload_len = { + // TODO: choices may be inline + let choices_len = self.choices.iter().fold(0, |payload_len, child| { + payload_len + if child.is_valid() { + // 32 bytes + 1 byte prefix + 33 + } else { + // 1 byte RLP_NULL + 1 + } + }); + let value_prefix_len = encoded_prefix_bytes(self.value.len()); + choices_len + value_prefix_len + self.value.len() + }; + let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom for child in self.choices.iter() { match child.compute_hash() { From 4f0e0366f9fe8b99f472b034cadab5709aede00c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 18:42:47 -0300 Subject: [PATCH 27/50] remove inline assumption --- crates/common/rlp/encode.rs | 14 +++++++------- crates/common/trie/node.rs | 7 +++++++ crates/common/trie/node/branch.rs | 29 +++++++++++++++-------------- 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index f3a75c1ce78..0fcb604f79c 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -207,14 +207,14 @@ pub fn encoded_prefix_bytes(total_len: usize) -> usize { match total_len { 0..56 => 1, 56..U8_MAX_PLUS_ONE => 2, - U8_MAX_PLUS_ONE..=U16_MAX_PLUS_ONE => 3, + U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 3, _ => { - usize::BITS as usize / 8 - - total_len - .to_be_bytes() - .iter() - .position(|&x| x != 0) - .unwrap() + let leading_zeros = total_len + .to_be_bytes() + .iter() + .position(|&x| x != 0) + .unwrap(); + usize::BITS as usize / 8 - leading_zeros } } } diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 33face0b327..ddc9fd806b4 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -85,6 +85,13 @@ impl NodeRef { } } + pub fn compute_hash_ref<'a>(&'a self) -> &'a NodeHash { + match self { + NodeRef::Node(node, hash) => hash.get_or_init(|| node.compute_hash()), + NodeRef::Hash(hash) => hash, + } + } + pub fn memoize_hashes(&self) { if let NodeRef::Node(node, hash) = &self { if hash.get().is_none() { diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index a5fb2447df5..0a00b6d540e 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -211,25 +211,26 @@ impl BranchNode { } /// Encodes the node + /// + /// Branch encoding is optimized because it's the majoritarian node type in a trie. pub fn encode_raw(&self) -> Vec { - let payload_len = { - // TODO: choices may be inline - let choices_len = self.choices.iter().fold(0, |payload_len, child| { - payload_len + if child.is_valid() { - // 32 bytes + 1 byte prefix - 33 - } else { - // 1 byte RLP_NULL - 1 + let value_len = encoded_prefix_bytes(self.value.len()) + self.value.len(); + let mut choices_len = 0; + for child in &self.choices { + match child.compute_hash_ref() { + NodeHash::Hashed(_) => choices_len += 33, + NodeHash::Inline(raw) if raw.1 != 0 => { + choices_len += encoded_prefix_bytes(raw.1 as usize); + choices_len += raw.1 as usize } - }); - let value_prefix_len = encoded_prefix_bytes(self.value.len()); - choices_len + value_prefix_len + self.value.len() - }; + _ => choices_len += 1, + } + } + let payload_len = choices_len + value_len; let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom for child in self.choices.iter() { - match child.compute_hash() { + match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), child @ NodeHash::Inline(raw) if raw.1 != 0 => { buf.extend(child.as_ref()); From 3dca224a46d906de09d41bb0ca3b075d2a15f467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 30 Sep 2025 18:48:58 -0300 Subject: [PATCH 28/50] add paylaod length encoding, comments --- crates/common/trie/node/branch.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 0a00b6d540e..0311745f9cf 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -211,14 +211,14 @@ impl BranchNode { } /// Encodes the node - /// - /// Branch encoding is optimized because it's the majoritarian node type in a trie. pub fn encode_raw(&self) -> Vec { + // length of `value` payload let value_len = encoded_prefix_bytes(self.value.len()) + self.value.len(); + // length of `choices` payload let mut choices_len = 0; for child in &self.choices { match child.compute_hash_ref() { - NodeHash::Hashed(_) => choices_len += 33, + NodeHash::Hashed(_) => choices_len += 33, // 1 byte prefix + 32 bytes NodeHash::Inline(raw) if raw.1 != 0 => { choices_len += encoded_prefix_bytes(raw.1 as usize); choices_len += raw.1 as usize @@ -226,9 +226,14 @@ impl BranchNode { _ => choices_len += 1, } } + // total payload len let payload_len = choices_len + value_len; + let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom + // encode payload prefix + encode_length(payload_len, &mut buf); + // encode choices for child in self.choices.iter() { match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), @@ -238,7 +243,7 @@ impl BranchNode { _ => [].encode(&mut buf), } } - + // encode value self.value.encode(&mut buf); buf From 86ae6a08e5272eac53f3c7f4ada8772384d78890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 10:19:49 -0300 Subject: [PATCH 29/50] simplify branch encoding --- crates/common/rlp/encode.rs | 17 +++++++++------- crates/common/trie/node/branch.rs | 33 +++++++------------------------ crates/common/trie/node_hash.rs | 9 ++++++++- 3 files changed, 25 insertions(+), 34 deletions(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 0fcb604f79c..1c8d86e6c05 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -201,20 +201,23 @@ pub fn encode_length(total_len: usize, buf: &mut dyn BufMut) { } } -pub fn encoded_prefix_bytes(total_len: usize) -> usize { +pub fn encoded_length(bytes: &[u8]) -> usize { const U8_MAX_PLUS_ONE: usize = u8::MAX as usize + 1; const U16_MAX_PLUS_ONE: usize = u16::MAX as usize + 1; - match total_len { - 0..56 => 1, - 56..U8_MAX_PLUS_ONE => 2, - U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 3, + + match bytes.len() { + 0 => 1, + 1 if bytes[0] < 0x80 => 1, + 1..56 => 1 + bytes.len(), + 56..U8_MAX_PLUS_ONE => 2 + bytes.len(), + U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 3 + bytes.len(), _ => { - let leading_zeros = total_len + let leading_zeros = bytes.len() .to_be_bytes() .iter() .position(|&x| x != 0) .unwrap(); - usize::BITS as usize / 8 - leading_zeros + usize::BITS as usize / 8 - leading_zeros + bytes.len() } } } diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 0311745f9cf..9952c9bd8cf 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,5 +1,5 @@ use ethrex_rlp::{ - encode::{RLPEncode, encode_length, encoded_prefix_bytes}, + encode::{RLPEncode, encode_length, encoded_length}, structs::Encoder, }; use serde::de::value; @@ -212,38 +212,19 @@ impl BranchNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - // length of `value` payload - let value_len = encoded_prefix_bytes(self.value.len()) + self.value.len(); - // length of `choices` payload - let mut choices_len = 0; - for child in &self.choices { - match child.compute_hash_ref() { - NodeHash::Hashed(_) => choices_len += 33, // 1 byte prefix + 32 bytes - NodeHash::Inline(raw) if raw.1 != 0 => { - choices_len += encoded_prefix_bytes(raw.1 as usize); - choices_len += raw.1 as usize - } - _ => choices_len += 1, - } - } - // total payload len + let value_len = encoded_length(&self.value); + let choices_len = self + .choices + .iter() + .fold(0, |acc, child| acc + child.compute_hash_ref().encoded_len()); let payload_len = choices_len + value_len; let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom - // encode payload prefix encode_length(payload_len, &mut buf); - // encode choices for child in self.choices.iter() { - match child.compute_hash_ref() { - NodeHash::Hashed(hash) => hash.0.encode(&mut buf), - child @ NodeHash::Inline(raw) if raw.1 != 0 => { - buf.extend(child.as_ref()); - } - _ => [].encode(&mut buf), - } + child.compute_hash_ref().as_ref().encode(&mut buf); } - // encode value self.value.encode(&mut buf); buf diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 49b99710bc9..6a14d961338 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -1,5 +1,5 @@ use ethereum_types::H256; -use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError, structs::Encoder}; +use ethrex_rlp::{decode::RLPDecode, encode::{RLPEncode, encoded_length}, error::RLPDecodeError, structs::Encoder}; #[cfg(feature = "libmdbx")] use libmdbx::orm::{Decodable, Encodable}; use sha3::{Digest, Keccak256}; @@ -90,6 +90,13 @@ impl NodeHash { } } + pub fn encoded_len(&self) -> usize { + match self { + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline(raw) => encoded_length(&raw.0), + } + } + pub fn is_empty(&self) -> bool { match self { NodeHash::Hashed(h256) => h256.as_bytes().is_empty(), From c2ed4d0956bf8a3251111058ca63df9d6eeaf5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 10:49:46 -0300 Subject: [PATCH 30/50] fix encoded_length() --- crates/common/rlp/encode.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 1c8d86e6c05..9f0fc421b2f 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -209,15 +209,11 @@ pub fn encoded_length(bytes: &[u8]) -> usize { 0 => 1, 1 if bytes[0] < 0x80 => 1, 1..56 => 1 + bytes.len(), - 56..U8_MAX_PLUS_ONE => 2 + bytes.len(), - U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 3 + bytes.len(), + 56..U8_MAX_PLUS_ONE => 1 + 1 + bytes.len(), + U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + bytes.len(), _ => { - let leading_zeros = bytes.len() - .to_be_bytes() - .iter() - .position(|&x| x != 0) - .unwrap(); - usize::BITS as usize / 8 - leading_zeros + bytes.len() + let len_bytes = (usize::BITS - bytes.len().leading_zeros()) as usize / 8; + 1 + len_bytes + bytes.len() } } } From 1df875c3bf4ef667d17e3dcce9d1030f325eacbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:03:09 -0300 Subject: [PATCH 31/50] nit --- crates/common/trie/node_hash.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 6a14d961338..6ce902ec37d 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -1,5 +1,5 @@ use ethereum_types::H256; -use ethrex_rlp::{decode::RLPDecode, encode::{RLPEncode, encoded_length}, error::RLPDecodeError, structs::Encoder}; +use ethrex_rlp::{decode::RLPDecode, encode::RLPEncode, error::RLPDecodeError, structs::Encoder}; #[cfg(feature = "libmdbx")] use libmdbx::orm::{Decodable, Encodable}; use sha3::{Digest, Keccak256}; @@ -92,8 +92,8 @@ impl NodeHash { pub fn encoded_len(&self) -> usize { match self { - NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline(raw) => encoded_length(&raw.0), + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline(raw) => 1 + raw.1 as usize, // 1 byte prefix + payload length, } } From 538c2ab6fdde82db33f86adfac5953668e8e9063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:06:08 -0300 Subject: [PATCH 32/50] revert leaf and extension quick rlp --- crates/common/trie/node/extension.rs | 54 +------------------- crates/common/trie/node/leaf.rs | 74 +--------------------------- 2 files changed, 2 insertions(+), 126 deletions(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 49e2c302e2f..74abfa6a921 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -1,8 +1,4 @@ -use std::io::Write; - -use ethereum_types::H256; use ethrex_rlp::structs::Encoder; -use sha3::{Digest, Keccak256}; use crate::ValueRLP; use crate::nibbles::Nibbles; @@ -148,55 +144,7 @@ impl ExtensionNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - let mut hasher = Keccak256::new(); - self.encode_write(&mut hasher); - let hash = hasher.finalize(); - NodeHash::Hashed(H256::from_slice(&hash)) - } - - /// Encodes the node - pub fn encode_write(&self, buf: &mut impl Write) { - let prefix_encoded = self.prefix.encode_compact(); - - // calc total payload len - // ASSUMPTION: there are no inline node refs, so child len is 1 + 32 bytes - // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) - let payload_len = { - let prefix_len = if prefix_encoded.len() == 1 && prefix_encoded[0] < 0x80 { - 1 - } else { - 1 + prefix_encoded.len() - }; - prefix_len + 33 - }; - - // write payload prefix - if payload_len < 56 { - buf.write(&[0xc0 + payload_len as u8]); - } else if payload_len < u8::MAX as usize { - buf.write(&[0xf8, payload_len as u8]); - } else { - // ASSUMPTION: list len will never be greater than (1 + 32) * 2 = 66 bytes - unreachable!(); - } - - // write prefix prefix - if prefix_encoded.len() == 1 && prefix_encoded[0] < 0x80 { - // value is its own encoding - } else { - // ASSUMPTION: prefix is never greater than 55 bytes (in particular it's at most 32 bytes) - buf.write(&[0x80 + prefix_encoded.len() as u8]); - } - // write prefix - buf.write(&prefix_encoded); - - // write child hash - let NodeHash::Hashed(child_hash) = self.child.compute_hash() else { - // ASSUMPTION: there are no inline node refs - unreachable!(); - }; - buf.write(&[0xa0]); - buf.write(&child_hash.0); + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index f110a7da5c3..8629fc90f70 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -1,8 +1,4 @@ -use std::io::Write; - -use ethereum_types::H256; use ethrex_rlp::structs::Encoder; -use sha3::{Digest, Keccak256}; use crate::{ValueRLP, error::TrieError, nibbles::Nibbles, node::BranchNode, node_hash::NodeHash}; @@ -118,75 +114,7 @@ impl LeafNode { /// Computes the node's hash pub fn compute_hash(&self) -> NodeHash { - let mut hasher = Keccak256::new(); - self.encode_write(&mut hasher); - let hash = hasher.finalize(); - NodeHash::Hashed(H256::from_slice(&hash)) - } - - /// Encodes the node - pub fn encode_write(&self, buf: &mut impl Write) { - let partial_encoded = self.partial.encode_compact(); - - // calc total payload len - let payload_len = { - // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) - let partial_len = if partial_encoded.len() == 1 && partial_encoded[0] < 0x80 { - 1 - } else { - 1 + partial_encoded.len() - }; - let value_prefix_len = match self.value.len() { - 1 if self.value[0] < 0x80 => 0, - 1 | ..56 => 1, - 56..256 => 2, - _ => 3, // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) - }; - partial_len + value_prefix_len + self.value.len() - }; - // write payload prefix - if payload_len < 56 { - buf.write(&[0xc0 + payload_len as u8]); - } else if payload_len < u8::MAX as usize { - buf.write(&[0xf8, payload_len as u8]); - } else { - // ASSUMPTION: list len will never be >u16::MAX (2 bytes len) - buf.write(&[ - 0xf9, - ((payload_len as u16) >> 8) as u8, - (payload_len & 0xff) as u8, - ]); - } - - // write partial prefix - if partial_encoded.len() == 1 && partial_encoded[0] < 0x80 { - // value is its own encoding - } else { - // ASSUMPTION: partial is never greater than 55 bytes (in particular it's at most 32 bytes) - buf.write(&[0x80 + partial_encoded.len() as u8]); - } - // write partial - buf.write(&partial_encoded); - - // write value prefix - if self.value.len() == 1 && self.value[0] < 0x80 { - // value is its own encoding - } else if self.value.len() < 56 { - buf.write(&[0x80 + self.value.len() as u8]); - } else if self.value.len() < u8::MAX as usize { - // TODO: < or <= - buf.write(&[0xb8, self.value.len() as u8]); - } else { - // ASSUMPTION: value len will never be >u16::MAX (2 bytes len) - buf.write(&[ - 0xb9, - ((self.value.len() as u16) >> 8) as u8, - (self.value.len() & 0xff) as u8, - ]); - } - - // write value - buf.write(&self.value); + NodeHash::from_encoded_raw(&self.encode_raw()) } /// Encodes the node From caadcd2fce50403e8673af249523d0343b79787b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:19:26 -0300 Subject: [PATCH 33/50] inline is already encoded --- crates/common/trie/node/branch.rs | 8 ++++++-- crates/common/trie/node_hash.rs | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 9952c9bd8cf..e71fc6a3031 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,8 +1,7 @@ use ethrex_rlp::{ + constants::RLP_NULL, encode::{RLPEncode, encode_length, encoded_length}, - structs::Encoder, }; -use serde::de::value; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -223,6 +222,11 @@ impl BranchNode { encode_length(payload_len, &mut buf); for child in self.choices.iter() { + match child.compute_hash_ref() { + NodeHash::Hashed(hash) => hash.0.encode(&mut buf), + NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), + child @ NodeHash::Inline(_) => buf.extend(child.as_ref()), + } child.compute_hash_ref().as_ref().encode(&mut buf); } self.value.encode(&mut buf); diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 6ce902ec37d..91c5ed3cb2e 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -92,8 +92,8 @@ impl NodeHash { pub fn encoded_len(&self) -> usize { match self { - NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline(raw) => 1 + raw.1 as usize, // 1 byte prefix + payload length, + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline(raw) => raw.1 as usize, // already encoded } } From deef7e093254a63f592a2079b3063e911bf605c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:29:04 -0300 Subject: [PATCH 34/50] fix encoded_len, remove extra line --- crates/common/trie/node.rs | 2 +- crates/common/trie/node/branch.rs | 1 - crates/common/trie/node_hash.rs | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index ddc9fd806b4..75d35175cd0 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -17,7 +17,7 @@ use ethrex_rlp::{ pub use extension::ExtensionNode; pub use leaf::LeafNode; -use crate::{TrieDB, error::TrieError, nibbles::Nibbles, node}; +use crate::{TrieDB, error::TrieError, nibbles::Nibbles}; use super::{ValueRLP, node_hash::NodeHash}; diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index e71fc6a3031..d89d1180f80 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -227,7 +227,6 @@ impl BranchNode { NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), child @ NodeHash::Inline(_) => buf.extend(child.as_ref()), } - child.compute_hash_ref().as_ref().encode(&mut buf); } self.value.encode(&mut buf); diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 91c5ed3cb2e..ec5f4d301f9 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -92,8 +92,8 @@ impl NodeHash { pub fn encoded_len(&self) -> usize { match self { - NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline(raw) => raw.1 as usize, // already encoded + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline((_, len)) => (*len as usize).min(1), // already encoded, if empty then it's encoded to RLP_NULL } } From cd19ec5467c15bcf236a5f8cdf83512462f70996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:30:09 -0300 Subject: [PATCH 35/50] remove explicit lifetime --- crates/common/trie/node.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 75d35175cd0..64c445271eb 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -85,7 +85,7 @@ impl NodeRef { } } - pub fn compute_hash_ref<'a>(&'a self) -> &'a NodeHash { + pub fn compute_hash_ref(&self) -> &NodeHash { match self { NodeRef::Node(node, hash) => hash.get_or_init(|| node.compute_hash()), NodeRef::Hash(hash) => hash, From 4b0af4bde82907b00a6c78d0365ca06c789f6a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:35:43 -0300 Subject: [PATCH 36/50] change min to max' --- crates/common/trie/node_hash.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index ec5f4d301f9..7c5ec3f9c1c 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -93,7 +93,7 @@ impl NodeHash { pub fn encoded_len(&self) -> usize { match self { NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline((_, len)) => (*len as usize).min(1), // already encoded, if empty then it's encoded to RLP_NULL + NodeHash::Inline((_, len)) => (*len as usize).max(1), // already encoded, if empty then it's encoded to RLP_NULL } } From 90b17c25c9bc87ec456e86c4cc724853a161cbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 11:40:31 -0300 Subject: [PATCH 37/50] clearer code --- crates/common/trie/node/branch.rs | 2 +- crates/common/trie/node_hash.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index d89d1180f80..c39964aa812 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -225,7 +225,7 @@ impl BranchNode { match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), - child @ NodeHash::Inline(_) => buf.extend(child.as_ref()), + NodeHash::Inline((encoded, len)) => buf.extend(&encoded[..*len as usize]), } } self.value.encode(&mut buf); diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index 7c5ec3f9c1c..d2e4fda2fa3 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -92,8 +92,9 @@ impl NodeHash { pub fn encoded_len(&self) -> usize { match self { - NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline((_, len)) => (*len as usize).max(1), // already encoded, if empty then it's encoded to RLP_NULL + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline((_, 0)) => 1, // if empty then it's encoded to RLP_NULL + NodeHash::Inline((_, len)) => *len as usize, // already encoded } } From 85e88aeab3414baa37cc9d4a71c317e48398bd8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 12:01:24 -0300 Subject: [PATCH 38/50] fix value encoding --- crates/common/trie/node/branch.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index c39964aa812..541e68b332b 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -225,10 +225,10 @@ impl BranchNode { match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), - NodeHash::Inline((encoded, len)) => buf.extend(&encoded[..*len as usize]), + NodeHash::Inline((encoded, len)) => buf.extend_from_slice(&encoded[..*len as usize]), } } - self.value.encode(&mut buf); + <[u8] as RLPEncode>::encode(&self.value, &mut buf); buf } From 8bd9ca3c61b59424d592d43c793bcf162be478c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 12:35:12 -0300 Subject: [PATCH 39/50] remove new_with_capacity --- crates/common/rlp/structs.rs | 10 ---------- crates/common/trie/node/branch.rs | 4 +++- crates/common/trie/node/extension.rs | 7 ++----- crates/common/trie/node/leaf.rs | 8 ++------ 4 files changed, 7 insertions(+), 22 deletions(-) diff --git a/crates/common/rlp/structs.rs b/crates/common/rlp/structs.rs index 68a31ccd30b..d493389c8f1 100644 --- a/crates/common/rlp/structs.rs +++ b/crates/common/rlp/structs.rs @@ -183,16 +183,6 @@ impl<'a> Encoder<'a> { } } - /// Creates a new encoder that writes to the given buffer. - /// - /// Preallocates the temp buffer for `capacity` bytes. - pub fn new_with_capacity(buf: &'a mut dyn BufMut, capacity: usize) -> Self { - Self { - buf, - temp_buf: Vec::with_capacity(capacity), - } - } - /// Stores a field to be encoded. pub fn encode_field(mut self, value: &T) -> Self { ::encode(value, &mut self.temp_buf); diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 541e68b332b..22bef221ce6 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -225,7 +225,9 @@ impl BranchNode { match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), - NodeHash::Inline((encoded, len)) => buf.extend_from_slice(&encoded[..*len as usize]), + NodeHash::Inline((encoded, len)) => { + buf.extend_from_slice(&encoded[..*len as usize]) + } } } <[u8] as RLPEncode>::encode(&self.value, &mut buf); diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 74abfa6a921..85bce8662a0 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -149,11 +149,8 @@ impl ExtensionNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - // (2 items) * 32 bytes, prefix and child (hash) - const RLP_ENCODED_SIZE: usize = 64; - let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); - let mut encoder = Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE) - .encode_bytes(&self.prefix.encode_compact()); + let mut buf = Vec::new(); + let mut encoder = Encoder::new(&mut buf).encode_bytes(&self.prefix.encode_compact()); encoder = self.child.compute_hash().encode(encoder); encoder.finish(); buf diff --git a/crates/common/trie/node/leaf.rs b/crates/common/trie/node/leaf.rs index 8629fc90f70..e38096d6f5e 100644 --- a/crates/common/trie/node/leaf.rs +++ b/crates/common/trie/node/leaf.rs @@ -119,12 +119,8 @@ impl LeafNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - // (1 items + 4 items) * 32 bytes, assuming worst case in which it contains an - // ethereum account (4 item RLP) and a 32 byte partial - const RLP_ENCODED_SIZE: usize = 160; - - let mut buf = Vec::with_capacity(RLP_ENCODED_SIZE); - Encoder::new_with_capacity(&mut buf, RLP_ENCODED_SIZE) + let mut buf = Vec::new(); + Encoder::new(&mut buf) .encode_bytes(&self.partial.encode_compact()) .encode_bytes(&self.value) .finish(); From 98da8c19cdf2c3b34ff8e5d546ab0d2c9438afb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 16:04:07 -0300 Subject: [PATCH 40/50] prettify --- crates/common/rlp/encode.rs | 35 ++++++++++++++++--------------- crates/common/trie/node.rs | 5 +---- crates/common/trie/node/branch.rs | 11 +++++----- crates/common/trie/node_hash.rs | 16 +++++++------- 4 files changed, 32 insertions(+), 35 deletions(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 9f0fc421b2f..dd9b58c2212 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -139,6 +139,24 @@ impl RLPEncode for [u8] { buf.put_slice(self); } } + + fn length(&self) -> usize { + const U8_MAX_PLUS_ONE: usize = u8::MAX as usize + 1; + const U16_MAX_PLUS_ONE: usize = u16::MAX as usize + 1; + + match self.len() { + 0 => 1, // encodes to RLP_NULL + 1 if self[0] < 0x80 => 1, // `self` is its own encoding + 1..56 => 1 + self.len(), // single byte prefix + 56..U8_MAX_PLUS_ONE => 1 + 1 + self.len(), // single byte prefix + payload len bytes + U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + self.len(), // single byte prefix + payload len bytes + _ => { + // fallback if `self` is longer than 2^16 - 1 bytes + let payload_len_bytes = (usize::BITS - self.len().leading_zeros()) as usize / 8; + 1 + payload_len_bytes + self.len() + } + } + } } impl RLPEncode for [u8; N] { @@ -201,23 +219,6 @@ pub fn encode_length(total_len: usize, buf: &mut dyn BufMut) { } } -pub fn encoded_length(bytes: &[u8]) -> usize { - const U8_MAX_PLUS_ONE: usize = u8::MAX as usize + 1; - const U16_MAX_PLUS_ONE: usize = u16::MAX as usize + 1; - - match bytes.len() { - 0 => 1, - 1 if bytes[0] < 0x80 => 1, - 1..56 => 1 + bytes.len(), - 56..U8_MAX_PLUS_ONE => 1 + 1 + bytes.len(), - U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + bytes.len(), - _ => { - let len_bytes = (usize::BITS - bytes.len().leading_zeros()) as usize / 8; - 1 + len_bytes + bytes.len() - } - } -} - impl RLPEncode for (S, T) { fn encode(&self, buf: &mut dyn BufMut) { let total_len = self.0.length() + self.1.length(); diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 64c445271eb..ea873869e64 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -79,10 +79,7 @@ impl NodeRef { } pub fn compute_hash(&self) -> NodeHash { - match self { - NodeRef::Node(node, hash) => *hash.get_or_init(|| node.compute_hash()), - NodeRef::Hash(hash) => *hash, - } + *self.compute_hash_ref() } pub fn compute_hash_ref(&self) -> &NodeHash { diff --git a/crates/common/trie/node/branch.rs b/crates/common/trie/node/branch.rs index 22bef221ce6..83342b0ff33 100644 --- a/crates/common/trie/node/branch.rs +++ b/crates/common/trie/node/branch.rs @@ -1,6 +1,6 @@ use ethrex_rlp::{ constants::RLP_NULL, - encode::{RLPEncode, encode_length, encoded_length}, + encode::{RLPEncode, encode_length}, }; use crate::{TrieDB, ValueRLP, error::TrieError, nibbles::Nibbles, node_hash::NodeHash}; @@ -211,11 +211,10 @@ impl BranchNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - let value_len = encoded_length(&self.value); - let choices_len = self - .choices - .iter() - .fold(0, |acc, child| acc + child.compute_hash_ref().encoded_len()); + let value_len = <[u8] as RLPEncode>::length(&self.value); + let choices_len = self.choices.iter().fold(0, |acc, child| { + acc + RLPEncode::length(child.compute_hash_ref()) + }); let payload_len = choices_len + value_len; let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom diff --git a/crates/common/trie/node_hash.rs b/crates/common/trie/node_hash.rs index d2e4fda2fa3..87f1db7c6a7 100644 --- a/crates/common/trie/node_hash.rs +++ b/crates/common/trie/node_hash.rs @@ -90,14 +90,6 @@ impl NodeHash { } } - pub fn encoded_len(&self) -> usize { - match self { - NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes - NodeHash::Inline((_, 0)) => 1, // if empty then it's encoded to RLP_NULL - NodeHash::Inline((_, len)) => *len as usize, // already encoded - } - } - pub fn is_empty(&self) -> bool { match self { NodeHash::Hashed(h256) => h256.as_bytes().is_empty(), @@ -151,6 +143,14 @@ impl RLPEncode for NodeHash { fn encode(&self, buf: &mut dyn bytes::BufMut) { RLPEncode::encode(&Into::>::into(self), buf) } + + fn length(&self) -> usize { + match self { + NodeHash::Hashed(_) => 33, // 1 byte prefix + 32 bytes + NodeHash::Inline((_, 0)) => 1, // if empty then it's encoded to RLP_NULL + NodeHash::Inline((_, len)) => *len as usize, // already encoded + } + } } impl RLPDecode for NodeHash { From 7723608c8bafb6eb833b293de606de89a0efd7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 17:31:16 -0300 Subject: [PATCH 41/50] Update crates/common/rlp/encode.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- crates/common/rlp/encode.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index dd9b58c2212..4208e3e0b68 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -152,7 +152,7 @@ impl RLPEncode for [u8] { U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + self.len(), // single byte prefix + payload len bytes _ => { // fallback if `self` is longer than 2^16 - 1 bytes - let payload_len_bytes = (usize::BITS - self.len().leading_zeros()) as usize / 8; + let payload_len_bytes = ((usize::BITS - self.len().leading_zeros()) as usize + 7) / 8; 1 + payload_len_bytes + self.len() } } From f9accd36e08022cf885584776899eb9547f1286a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 17:34:46 -0300 Subject: [PATCH 42/50] Refactor payload length calculation for RLP encoding --- crates/common/rlp/encode.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 4208e3e0b68..08807f3fed6 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -152,7 +152,7 @@ impl RLPEncode for [u8] { U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + self.len(), // single byte prefix + payload len bytes _ => { // fallback if `self` is longer than 2^16 - 1 bytes - let payload_len_bytes = ((usize::BITS - self.len().leading_zeros()) as usize + 7) / 8; + let payload_len_bytes = ((usize::BITS - self.len().leading_zeros()) as usize).div_ceil(8); 1 + payload_len_bytes + self.len() } } From 44d134c2a5c21dec620c2e3b66bb6076d850fce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 17:38:06 -0300 Subject: [PATCH 43/50] fmt --- crates/common/rlp/encode.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/common/rlp/encode.rs b/crates/common/rlp/encode.rs index 08807f3fed6..7835955dc49 100644 --- a/crates/common/rlp/encode.rs +++ b/crates/common/rlp/encode.rs @@ -152,7 +152,8 @@ impl RLPEncode for [u8] { U8_MAX_PLUS_ONE..U16_MAX_PLUS_ONE => 1 + 2 + self.len(), // single byte prefix + payload len bytes _ => { // fallback if `self` is longer than 2^16 - 1 bytes - let payload_len_bytes = ((usize::BITS - self.len().leading_zeros()) as usize).div_ceil(8); + let payload_len_bytes = + ((usize::BITS - self.len().leading_zeros()) as usize).div_ceil(8); 1 + payload_len_bytes + self.len() } } From 910d3cb9c844d2fd7c5ae2e374fc52eecdcfc86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Wed, 1 Oct 2025 18:54:12 -0300 Subject: [PATCH 44/50] Refactor buffer initialization in encode_raw method --- crates/common/trie/node/extension.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/common/trie/node/extension.rs b/crates/common/trie/node/extension.rs index 85bce8662a0..9878a6f5b47 100644 --- a/crates/common/trie/node/extension.rs +++ b/crates/common/trie/node/extension.rs @@ -149,7 +149,7 @@ impl ExtensionNode { /// Encodes the node pub fn encode_raw(&self) -> Vec { - let mut buf = Vec::new(); + let mut buf = vec![]; let mut encoder = Encoder::new(&mut buf).encode_bytes(&self.prefix.encode_compact()); encoder = self.child.compute_hash().encode(encoder); encoder.finish(); From 4b9209a6813ffc346eb73ee3c2a0437684798de7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Tue, 21 Oct 2025 17:02:02 -0300 Subject: [PATCH 45/50] clippy --- crates/common/trie/node.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index b87d624b6bb..8d2777365c7 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -95,11 +95,9 @@ impl NodeRef { } pub fn memoize_hashes(&self) { - if let NodeRef::Node(node, hash) = &self { - if hash.get().is_none() { - node.memoize_hashes(); - let _ = hash.set(node.compute_hash()); - } + if let NodeRef::Node(node, hash) = &self && hash.get().is_none() { + node.memoize_hashes(); + let _ = hash.set(node.compute_hash()); } } } From 2a1bab816432a34eb789d774880eb80ca50142b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 23 Oct 2025 15:50:14 -0300 Subject: [PATCH 46/50] fmt --- crates/common/trie/node.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/common/trie/node.rs b/crates/common/trie/node.rs index 8d2777365c7..40ddf0bd00a 100644 --- a/crates/common/trie/node.rs +++ b/crates/common/trie/node.rs @@ -95,7 +95,9 @@ impl NodeRef { } pub fn memoize_hashes(&self) { - if let NodeRef::Node(node, hash) = &self && hash.get().is_none() { + if let NodeRef::Node(node, hash) = &self + && hash.get().is_none() + { node.memoize_hashes(); let _ = hash.set(node.compute_hash()); } From 42e638dca46f74af1d1a92594c9a420984811adb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 30 Oct 2025 11:36:28 -0300 Subject: [PATCH 47/50] implement encode_to_vec for branch node --- crates/common/trie/rlp.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/crates/common/trie/rlp.rs b/crates/common/trie/rlp.rs index 4a02dec9a5a..80d8afd821c 100644 --- a/crates/common/trie/rlp.rs +++ b/crates/common/trie/rlp.rs @@ -31,6 +31,28 @@ impl RLPEncode for BranchNode { } <[u8] as RLPEncode>::encode(&self.value, buf); } + + // Duplicated to prealloc the buffer and avoid calculating the payload length twice + fn encode_to_vec(&self) -> Vec { + let value_len = <[u8] as RLPEncode>::length(&self.value); + let choices_len = self.choices.iter().fold(0, |acc, child| { + acc + RLPEncode::length(child.compute_hash_ref()) + }); + let payload_len = choices_len + value_len; + + let mut buf: Vec = Vec::with_capacity(payload_len + 3); // 3 byte prefix headroom + + encode_length(payload_len, &mut buf); + for child in self.choices.iter() { + match child.compute_hash_ref() { + NodeHash::Hashed(hash) => hash.0.encode(&mut buf), + NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), + NodeHash::Inline((encoded, len)) => buf.extend_from_slice(&encoded[..*len as usize]), + } + } + + buf + } } impl RLPEncode for ExtensionNode { From 68f9875d12da00a20ab3c671084957c4ce19d4a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 30 Oct 2025 11:36:51 -0300 Subject: [PATCH 48/50] fix --- crates/common/trie/rlp.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/common/trie/rlp.rs b/crates/common/trie/rlp.rs index 80d8afd821c..c8bf5d1953d 100644 --- a/crates/common/trie/rlp.rs +++ b/crates/common/trie/rlp.rs @@ -50,6 +50,7 @@ impl RLPEncode for BranchNode { NodeHash::Inline((encoded, len)) => buf.extend_from_slice(&encoded[..*len as usize]), } } + <[u8] as RLPEncode>::encode(&self.value, &mut buf); buf } From 27209549a8573b5e8e2e6a48b4cb411fd4f005e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Est=C3=A9fano=20Bargas?= Date: Thu, 30 Oct 2025 12:09:08 -0300 Subject: [PATCH 49/50] fmt --- crates/common/trie/rlp.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/common/trie/rlp.rs b/crates/common/trie/rlp.rs index c8bf5d1953d..b04f5818f18 100644 --- a/crates/common/trie/rlp.rs +++ b/crates/common/trie/rlp.rs @@ -47,7 +47,9 @@ impl RLPEncode for BranchNode { match child.compute_hash_ref() { NodeHash::Hashed(hash) => hash.0.encode(&mut buf), NodeHash::Inline((_, 0)) => buf.push(RLP_NULL), - NodeHash::Inline((encoded, len)) => buf.extend_from_slice(&encoded[..*len as usize]), + NodeHash::Inline((encoded, len)) => { + buf.extend_from_slice(&encoded[..*len as usize]) + } } } <[u8] as RLPEncode>::encode(&self.value, &mut buf); From 8bccc0cd398a6fd218edd9cc2509b603af10fca6 Mon Sep 17 00:00:00 2001 From: Ivan Litteri <67517699+ilitteri@users.noreply.github.com> Date: Mon, 10 Nov 2025 16:41:54 -0300 Subject: [PATCH 50/50] Update crates/common/trie/rlp.rs Co-authored-by: Mario Rugiero --- crates/common/trie/rlp.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/common/trie/rlp.rs b/crates/common/trie/rlp.rs index b04f5818f18..1b4950572c2 100644 --- a/crates/common/trie/rlp.rs +++ b/crates/common/trie/rlp.rs @@ -16,10 +16,9 @@ use crate::{Nibbles, NodeHash}; impl RLPEncode for BranchNode { fn encode(&self, buf: &mut dyn bytes::BufMut) { let value_len = <[u8] as RLPEncode>::length(&self.value); - let choices_len = self.choices.iter().fold(0, |acc, child| { + let payload_len = self.choices.iter().fold(value_len, |acc, child| { acc + RLPEncode::length(child.compute_hash_ref()) }); - let payload_len = choices_len + value_len; encode_length(payload_len, buf); for child in self.choices.iter() {