-
Notifications
You must be signed in to change notification settings - Fork 2.2k
fix(engine): flatten storage cache #18880
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
60f4db1
415c3b7
ea9affc
0f83128
a52d06d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,8 @@ | ||
| //! Execution cache implementation for block processing. | ||
| use alloy_primitives::{Address, StorageKey, StorageValue, B256}; | ||
| use alloy_primitives::{ | ||
| map::{DefaultHashBuilder, HashSet}, | ||
| Address, StorageKey, StorageValue, B256, | ||
| }; | ||
| use metrics::Gauge; | ||
| use mini_moka::sync::CacheBuilder; | ||
| use reth_errors::ProviderResult; | ||
|
|
@@ -14,7 +17,6 @@ use reth_trie::{ | |
| updates::TrieUpdates, AccountProof, HashedPostState, HashedStorage, MultiProof, | ||
| MultiProofTargets, StorageMultiProof, StorageProof, TrieInput, | ||
| }; | ||
| use revm_primitives::map::DefaultHashBuilder; | ||
| use std::{sync::Arc, time::Duration}; | ||
| use tracing::trace; | ||
|
|
||
|
|
@@ -300,65 +302,65 @@ pub(crate) struct ExecutionCache { | |
| /// Cache for contract bytecode, keyed by code hash. | ||
| code_cache: Cache<B256, Option<Bytecode>>, | ||
|
|
||
| /// Per-account storage cache: outer cache keyed by Address, inner cache tracks that account’s | ||
| /// storage slots. | ||
| storage_cache: Cache<Address, AccountStorageCache>, | ||
| /// Flattened storage cache: composite key of (`Address`, `StorageKey`) maps directly to values. | ||
| storage_cache: Cache<(Address, StorageKey), Option<StorageValue>>, | ||
|
|
||
| /// Cache for basic account information (nonce, balance, code hash). | ||
| account_cache: Cache<Address, Option<Account>>, | ||
| } | ||
|
|
||
| impl ExecutionCache { | ||
| /// Get storage value from hierarchical cache. | ||
| /// Get storage value from flattened cache. | ||
| /// | ||
| /// Returns a `SlotStatus` indicating whether: | ||
| /// - `NotCached`: The account's storage cache doesn't exist | ||
| /// - `Empty`: The slot exists in the account's cache but is empty | ||
| /// - `NotCached`: The storage slot is not in the cache | ||
| /// - `Empty`: The slot exists in the cache but is empty | ||
| /// - `Value`: The slot exists and has a specific value | ||
| pub(crate) fn get_storage(&self, address: &Address, key: &StorageKey) -> SlotStatus { | ||
| match self.storage_cache.get(address) { | ||
| match self.storage_cache.get(&(*address, *key)) { | ||
| None => SlotStatus::NotCached, | ||
| Some(account_cache) => account_cache.get_storage(key), | ||
| Some(None) => SlotStatus::Empty, | ||
| Some(Some(value)) => SlotStatus::Value(value), | ||
| } | ||
| } | ||
|
|
||
| /// Insert storage value into hierarchical cache | ||
| /// Insert storage value into flattened cache | ||
| pub(crate) fn insert_storage( | ||
| &self, | ||
| address: Address, | ||
| key: StorageKey, | ||
| value: Option<StorageValue>, | ||
| ) { | ||
| self.insert_storage_bulk(address, [(key, value)]); | ||
| self.storage_cache.insert((address, key), value); | ||
| } | ||
|
|
||
| /// Insert multiple storage values into hierarchical cache for a single account | ||
| /// Insert multiple storage values into flattened cache for a single account | ||
| /// | ||
| /// This method is optimized for inserting multiple storage values for the same address | ||
| /// by doing the account cache lookup only once instead of for each key-value pair. | ||
| /// This method inserts multiple storage values for the same address directly | ||
| /// into the flattened cache. | ||
| pub(crate) fn insert_storage_bulk<I>(&self, address: Address, storage_entries: I) | ||
| where | ||
| I: IntoIterator<Item = (StorageKey, Option<StorageValue>)>, | ||
| { | ||
| let account_cache = self.storage_cache.get(&address).unwrap_or_else(|| { | ||
| let account_cache = AccountStorageCache::default(); | ||
| self.storage_cache.insert(address, account_cache.clone()); | ||
| account_cache | ||
| }); | ||
|
|
||
| for (key, value) in storage_entries { | ||
| account_cache.insert_storage(key, value); | ||
| self.storage_cache.insert((address, key), value); | ||
| } | ||
| } | ||
|
|
||
| /// Invalidate storage for specific account | ||
| pub(crate) fn invalidate_account_storage(&self, address: &Address) { | ||
| self.storage_cache.invalidate(address); | ||
| } | ||
|
|
||
| /// Returns the total number of storage slots cached across all accounts | ||
| pub(crate) fn total_storage_slots(&self) -> usize { | ||
| self.storage_cache.iter().map(|addr| addr.len()).sum() | ||
| self.storage_cache.entry_count() as usize | ||
| } | ||
|
|
||
| /// Invalidates the storage for all addresses in the set | ||
| pub(crate) fn invalidate_storages(&self, addresses: HashSet<&Address>) { | ||
| let storage_entries = self | ||
| .storage_cache | ||
| .iter() | ||
| .filter_map(|entry| addresses.contains(&entry.key().0).then_some(*entry.key())); | ||
| for key in storage_entries { | ||
| self.storage_cache.invalidate(&key) | ||
| } | ||
|
Comment on lines
360
to
367
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, unsure this is sound because this feels problematic dashmap iter
and cache iter:
tho I don't know if this is applicable here should be perhaps collect then remove?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, yeah I think we should collect then remove here
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this now collects
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should document this with a one liner because this is a prime candidate for a bot "optimization" pr that would look perfectly valid |
||
| } | ||
|
|
||
| /// Inserts the post-execution state changes into the cache. | ||
|
|
@@ -385,6 +387,7 @@ impl ExecutionCache { | |
| self.code_cache.insert(*code_hash, Some(Bytecode(bytecode.clone()))); | ||
| } | ||
|
|
||
| let mut invalidated_accounts = HashSet::default(); | ||
| for (addr, account) in &state_updates.state { | ||
| // If the account was not modified, as in not changed and not destroyed, then we have | ||
| // nothing to do w.r.t. this particular account and can move on | ||
|
|
@@ -397,7 +400,7 @@ impl ExecutionCache { | |
| // Invalidate the account cache entry if destroyed | ||
| self.account_cache.invalidate(addr); | ||
|
|
||
| self.invalidate_account_storage(addr); | ||
| invalidated_accounts.insert(addr); | ||
| continue | ||
| } | ||
|
|
||
|
|
@@ -424,6 +427,9 @@ impl ExecutionCache { | |
| self.account_cache.insert(*addr, Some(Account::from(account_info))); | ||
| } | ||
|
|
||
| // invalidate storage for all destroyed acocunts | ||
| self.invalidate_storages(invalidated_accounts); | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
|
|
@@ -452,11 +458,11 @@ impl ExecutionCacheBuilder { | |
| const TIME_TO_IDLE: Duration = Duration::from_secs(3600); // 1 hour | ||
|
|
||
| let storage_cache = CacheBuilder::new(self.storage_cache_entries) | ||
| .weigher(|_key: &Address, value: &AccountStorageCache| -> u32 { | ||
| // values based on results from measure_storage_cache_overhead test | ||
| let base_weight = 39_000; | ||
| let slots_weight = value.len() * 218; | ||
| (base_weight + slots_weight) as u32 | ||
| .weigher(|_key: &(Address, StorageKey), _value: &Option<StorageValue>| -> u32 { | ||
| // Size of composite key (Address + StorageKey) + Option<StorageValue> | ||
| // Address: 20 bytes, StorageKey: 32 bytes, Option<StorageValue>: 33 bytes | ||
| // Plus some overhead for the hash map entry | ||
| 120_u32 | ||
| }) | ||
| .max_capacity(storage_cache_size) | ||
| .time_to_live(EXPIRY_TIME) | ||
|
|
@@ -573,56 +579,6 @@ impl SavedCache { | |
| } | ||
| } | ||
|
|
||
| /// Cache for an individual account's storage slots. | ||
| /// | ||
| /// This represents the second level of the hierarchical storage cache. | ||
| /// Each account gets its own `AccountStorageCache` to store accessed storage slots. | ||
| #[derive(Debug, Clone)] | ||
| pub(crate) struct AccountStorageCache { | ||
| /// Map of storage keys to their cached values. | ||
| slots: Cache<StorageKey, Option<StorageValue>>, | ||
| } | ||
|
|
||
| impl AccountStorageCache { | ||
| /// Create a new [`AccountStorageCache`] | ||
| pub(crate) fn new(max_slots: u64) -> Self { | ||
| Self { | ||
| slots: CacheBuilder::new(max_slots).build_with_hasher(DefaultHashBuilder::default()), | ||
| } | ||
| } | ||
|
|
||
| /// Get a storage value from this account's cache. | ||
| /// - `NotCached`: The slot is not in the cache | ||
| /// - `Empty`: The slot is empty | ||
| /// - `Value`: The slot has a specific value | ||
| pub(crate) fn get_storage(&self, key: &StorageKey) -> SlotStatus { | ||
| match self.slots.get(key) { | ||
| None => SlotStatus::NotCached, | ||
| Some(None) => SlotStatus::Empty, | ||
| Some(Some(value)) => SlotStatus::Value(value), | ||
| } | ||
| } | ||
|
|
||
| /// Insert a storage value | ||
| pub(crate) fn insert_storage(&self, key: StorageKey, value: Option<StorageValue>) { | ||
| self.slots.insert(key, value); | ||
| } | ||
|
|
||
| /// Returns the number of slots in the cache | ||
| pub(crate) fn len(&self) -> usize { | ||
| self.slots.entry_count() as usize | ||
| } | ||
| } | ||
|
|
||
| impl Default for AccountStorageCache { | ||
| fn default() -> Self { | ||
| // With weigher and max_capacity in place, this number represents | ||
| // the maximum number of entries that can be stored, not the actual | ||
| // memory usage which is controlled by storage cache's max_capacity. | ||
| Self::new(1_000_000) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
|
|
@@ -697,32 +653,36 @@ mod tests { | |
|
|
||
| #[test] | ||
| fn measure_storage_cache_overhead() { | ||
| let (base_overhead, cache) = measure_allocation(|| AccountStorageCache::new(1000)); | ||
| println!("Base AccountStorageCache overhead: {base_overhead} bytes"); | ||
| let (base_overhead, cache) = | ||
| measure_allocation(|| ExecutionCacheBuilder::default().build_caches(1000)); | ||
| println!("Base ExecutionCache overhead: {base_overhead} bytes"); | ||
| let mut rng = rand::rng(); | ||
|
|
||
| let address = Address::random(); | ||
| let key = StorageKey::random(); | ||
| let value = StorageValue::from(rng.random::<u128>()); | ||
| let (first_slot, _) = measure_allocation(|| { | ||
| cache.insert_storage(key, Some(value)); | ||
| cache.insert_storage(address, key, Some(value)); | ||
| }); | ||
| println!("First slot insertion overhead: {first_slot} bytes"); | ||
|
|
||
| const TOTAL_SLOTS: usize = 10_000; | ||
| let (test_slots, _) = measure_allocation(|| { | ||
| for _ in 0..TOTAL_SLOTS { | ||
| let addr = Address::random(); | ||
| let key = StorageKey::random(); | ||
| let value = StorageValue::from(rng.random::<u128>()); | ||
| cache.insert_storage(key, Some(value)); | ||
| cache.insert_storage(addr, key, Some(value)); | ||
| } | ||
| }); | ||
| println!("Average overhead over {} slots: {} bytes", TOTAL_SLOTS, test_slots / TOTAL_SLOTS); | ||
|
|
||
| println!("\nTheoretical sizes:"); | ||
| println!("Address size: {} bytes", size_of::<Address>()); | ||
| println!("StorageKey size: {} bytes", size_of::<StorageKey>()); | ||
| println!("StorageValue size: {} bytes", size_of::<StorageValue>()); | ||
| println!("Option<StorageValue> size: {} bytes", size_of::<Option<StorageValue>>()); | ||
| println!("Option<B256> size: {} bytes", size_of::<Option<B256>>()); | ||
| println!("(Address, StorageKey) size: {} bytes", size_of::<(Address, StorageKey)>()); | ||
| } | ||
|
|
||
| #[test] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right, we can't really nest caches because this would mean that the weigher of the storage_cache would only consider new acc -> storage entries
this is better because it treats storage slots as individual entries, which I guess explains the higher hit rate
we likely have additional address overhead, but this should be okay, we could compress this to a u64 with a separate map, like we do in the pool, but perhaps not that useful