Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 49 additions & 89 deletions crates/engine/tree/src/tree/cached_state.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
//! Execution cache implementation for block processing.
use alloy_primitives::{Address, StorageKey, StorageValue, B256};
use alloy_primitives::{
map::{DefaultHashBuilder, HashSet},
Address, StorageKey, StorageValue, B256,
};
use metrics::Gauge;
use mini_moka::sync::CacheBuilder;
use reth_errors::ProviderResult;
Expand All @@ -14,7 +17,6 @@ use reth_trie::{
updates::TrieUpdates, AccountProof, HashedPostState, HashedStorage, MultiProof,
MultiProofTargets, StorageMultiProof, StorageProof, TrieInput,
};
use revm_primitives::map::DefaultHashBuilder;
use std::{sync::Arc, time::Duration};
use tracing::trace;

Expand Down Expand Up @@ -300,65 +302,65 @@ pub(crate) struct ExecutionCache {
/// Cache for contract bytecode, keyed by code hash.
code_cache: Cache<B256, Option<Bytecode>>,

/// Per-account storage cache: outer cache keyed by Address, inner cache tracks that account’s
/// storage slots.
storage_cache: Cache<Address, AccountStorageCache>,
/// Flattened storage cache: composite key of (`Address`, `StorageKey`) maps directly to values.
storage_cache: Cache<(Address, StorageKey), Option<StorageValue>>,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, we can't really nest caches because this would mean that the weigher of the storage_cache would only consider new acc -> storage entries

this is better because it treats storage slots as individual entries, which I guess explains the higher hit rate

we likely have additional address overhead, but this should be okay, we could compress this to a u64 with a separate map, like we do in the pool, but perhaps not that useful


/// Cache for basic account information (nonce, balance, code hash).
account_cache: Cache<Address, Option<Account>>,
}

impl ExecutionCache {
/// Get storage value from hierarchical cache.
/// Get storage value from flattened cache.
///
/// Returns a `SlotStatus` indicating whether:
/// - `NotCached`: The account's storage cache doesn't exist
/// - `Empty`: The slot exists in the account's cache but is empty
/// - `NotCached`: The storage slot is not in the cache
/// - `Empty`: The slot exists in the cache but is empty
/// - `Value`: The slot exists and has a specific value
pub(crate) fn get_storage(&self, address: &Address, key: &StorageKey) -> SlotStatus {
match self.storage_cache.get(address) {
match self.storage_cache.get(&(*address, *key)) {
None => SlotStatus::NotCached,
Some(account_cache) => account_cache.get_storage(key),
Some(None) => SlotStatus::Empty,
Some(Some(value)) => SlotStatus::Value(value),
}
}

/// Insert storage value into hierarchical cache
/// Insert storage value into flattened cache
pub(crate) fn insert_storage(
&self,
address: Address,
key: StorageKey,
value: Option<StorageValue>,
) {
self.insert_storage_bulk(address, [(key, value)]);
self.storage_cache.insert((address, key), value);
}

/// Insert multiple storage values into hierarchical cache for a single account
/// Insert multiple storage values into flattened cache for a single account
///
/// This method is optimized for inserting multiple storage values for the same address
/// by doing the account cache lookup only once instead of for each key-value pair.
/// This method inserts multiple storage values for the same address directly
/// into the flattened cache.
pub(crate) fn insert_storage_bulk<I>(&self, address: Address, storage_entries: I)
where
I: IntoIterator<Item = (StorageKey, Option<StorageValue>)>,
{
let account_cache = self.storage_cache.get(&address).unwrap_or_else(|| {
let account_cache = AccountStorageCache::default();
self.storage_cache.insert(address, account_cache.clone());
account_cache
});

for (key, value) in storage_entries {
account_cache.insert_storage(key, value);
self.storage_cache.insert((address, key), value);
}
}

/// Invalidate storage for specific account
pub(crate) fn invalidate_account_storage(&self, address: &Address) {
self.storage_cache.invalidate(address);
}

/// Returns the total number of storage slots cached across all accounts
pub(crate) fn total_storage_slots(&self) -> usize {
self.storage_cache.iter().map(|addr| addr.len()).sum()
self.storage_cache.entry_count() as usize
}

/// Invalidates the storage for all addresses in the set
pub(crate) fn invalidate_storages(&self, addresses: HashSet<&Address>) {
let storage_entries = self
.storage_cache
.iter()
.filter_map(|entry| addresses.contains(&entry.key().0).then_some(*entry.key()));
for key in storage_entries {
self.storage_cache.invalidate(&key)
}
Comment on lines 360 to 367
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, unsure this is sound because this feels problematic

dashmap iter

Locking behaviour: May deadlock if called when holding a mutable reference into the map.

and cache iter:

Locking behavior
This iterator relies on the iterator of dashmap::DashMap , which employs read-write locks. May deadlock if the thread holding an iterator attempts to update the cache.

tho I don't know if this is applicable here
unfortunately we don't have a retain here

should be perhaps collect then remove?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, yeah I think we should collect then remove here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this now collects

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should document this with a one liner because this is a prime candidate for a bot "optimization" pr that would look perfectly valid

}

/// Inserts the post-execution state changes into the cache.
Expand All @@ -385,6 +387,7 @@ impl ExecutionCache {
self.code_cache.insert(*code_hash, Some(Bytecode(bytecode.clone())));
}

let mut invalidated_accounts = HashSet::default();
for (addr, account) in &state_updates.state {
// If the account was not modified, as in not changed and not destroyed, then we have
// nothing to do w.r.t. this particular account and can move on
Expand All @@ -397,7 +400,7 @@ impl ExecutionCache {
// Invalidate the account cache entry if destroyed
self.account_cache.invalidate(addr);

self.invalidate_account_storage(addr);
invalidated_accounts.insert(addr);
continue
}

Expand All @@ -424,6 +427,9 @@ impl ExecutionCache {
self.account_cache.insert(*addr, Some(Account::from(account_info)));
}

// invalidate storage for all destroyed acocunts
self.invalidate_storages(invalidated_accounts);

Ok(())
}
}
Expand Down Expand Up @@ -452,11 +458,11 @@ impl ExecutionCacheBuilder {
const TIME_TO_IDLE: Duration = Duration::from_secs(3600); // 1 hour

let storage_cache = CacheBuilder::new(self.storage_cache_entries)
.weigher(|_key: &Address, value: &AccountStorageCache| -> u32 {
// values based on results from measure_storage_cache_overhead test
let base_weight = 39_000;
let slots_weight = value.len() * 218;
(base_weight + slots_weight) as u32
.weigher(|_key: &(Address, StorageKey), _value: &Option<StorageValue>| -> u32 {
// Size of composite key (Address + StorageKey) + Option<StorageValue>
// Address: 20 bytes, StorageKey: 32 bytes, Option<StorageValue>: 33 bytes
// Plus some overhead for the hash map entry
120_u32
})
.max_capacity(storage_cache_size)
.time_to_live(EXPIRY_TIME)
Expand Down Expand Up @@ -573,56 +579,6 @@ impl SavedCache {
}
}

/// Cache for an individual account's storage slots.
///
/// This represents the second level of the hierarchical storage cache.
/// Each account gets its own `AccountStorageCache` to store accessed storage slots.
#[derive(Debug, Clone)]
pub(crate) struct AccountStorageCache {
/// Map of storage keys to their cached values.
slots: Cache<StorageKey, Option<StorageValue>>,
}

impl AccountStorageCache {
/// Create a new [`AccountStorageCache`]
pub(crate) fn new(max_slots: u64) -> Self {
Self {
slots: CacheBuilder::new(max_slots).build_with_hasher(DefaultHashBuilder::default()),
}
}

/// Get a storage value from this account's cache.
/// - `NotCached`: The slot is not in the cache
/// - `Empty`: The slot is empty
/// - `Value`: The slot has a specific value
pub(crate) fn get_storage(&self, key: &StorageKey) -> SlotStatus {
match self.slots.get(key) {
None => SlotStatus::NotCached,
Some(None) => SlotStatus::Empty,
Some(Some(value)) => SlotStatus::Value(value),
}
}

/// Insert a storage value
pub(crate) fn insert_storage(&self, key: StorageKey, value: Option<StorageValue>) {
self.slots.insert(key, value);
}

/// Returns the number of slots in the cache
pub(crate) fn len(&self) -> usize {
self.slots.entry_count() as usize
}
}

impl Default for AccountStorageCache {
fn default() -> Self {
// With weigher and max_capacity in place, this number represents
// the maximum number of entries that can be stored, not the actual
// memory usage which is controlled by storage cache's max_capacity.
Self::new(1_000_000)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -697,32 +653,36 @@ mod tests {

#[test]
fn measure_storage_cache_overhead() {
let (base_overhead, cache) = measure_allocation(|| AccountStorageCache::new(1000));
println!("Base AccountStorageCache overhead: {base_overhead} bytes");
let (base_overhead, cache) =
measure_allocation(|| ExecutionCacheBuilder::default().build_caches(1000));
println!("Base ExecutionCache overhead: {base_overhead} bytes");
let mut rng = rand::rng();

let address = Address::random();
let key = StorageKey::random();
let value = StorageValue::from(rng.random::<u128>());
let (first_slot, _) = measure_allocation(|| {
cache.insert_storage(key, Some(value));
cache.insert_storage(address, key, Some(value));
});
println!("First slot insertion overhead: {first_slot} bytes");

const TOTAL_SLOTS: usize = 10_000;
let (test_slots, _) = measure_allocation(|| {
for _ in 0..TOTAL_SLOTS {
let addr = Address::random();
let key = StorageKey::random();
let value = StorageValue::from(rng.random::<u128>());
cache.insert_storage(key, Some(value));
cache.insert_storage(addr, key, Some(value));
}
});
println!("Average overhead over {} slots: {} bytes", TOTAL_SLOTS, test_slots / TOTAL_SLOTS);

println!("\nTheoretical sizes:");
println!("Address size: {} bytes", size_of::<Address>());
println!("StorageKey size: {} bytes", size_of::<StorageKey>());
println!("StorageValue size: {} bytes", size_of::<StorageValue>());
println!("Option<StorageValue> size: {} bytes", size_of::<Option<StorageValue>>());
println!("Option<B256> size: {} bytes", size_of::<Option<B256>>());
println!("(Address, StorageKey) size: {} bytes", size_of::<(Address, StorageKey)>());
}

#[test]
Expand Down
Loading