diff --git a/Cargo.lock b/Cargo.lock index f999d987754b1..d3b19353c3bee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,7 +179,7 @@ dependencies = [ "const-hex", "derive_more 1.0.0", "foldhash", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "hex-literal", "indexmap 2.9.0", "itoa", @@ -506,7 +506,7 @@ dependencies = [ "ark-std 0.5.0", "educe", "fnv", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "itertools 0.13.0", "num-bigint", "num-integer", @@ -740,7 +740,7 @@ dependencies = [ "ark-std 0.5.0", "educe", "fnv", - "hashbrown 0.15.2", + "hashbrown 0.15.3", ] [[package]] @@ -4523,6 +4523,7 @@ dependencies = [ "frame-support", "frame-system", "futures", + "hashbrown 0.15.3", "hex-literal", "impl-trait-for-tuples", "log", @@ -7541,11 +7542,12 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", + "equivalent", "foldhash", "serde", ] @@ -8287,7 +8289,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 479320f4b68c4..b37bd3e7a3431 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -834,6 +834,7 @@ gum-proc-macro = { path = "polkadot/node/gum/proc-macro", default-features = fal handlebars = { version = "5.1.0" } hash-db = { version = "0.16.0", default-features = false } hash256-std-hasher = { version = "0.15.2", default-features = false } +hashbrown = "0.15.3" hex = { version = "0.4.3", default-features = false } hex-literal = { version = "0.4.1", default-features = false } hkdf = { version = "0.12.0" } diff --git a/cumulus/pallets/parachain-system/Cargo.toml b/cumulus/pallets/parachain-system/Cargo.toml index 9752abe2914ea..fa8a0c11af921 100644 --- a/cumulus/pallets/parachain-system/Cargo.toml +++ b/cumulus/pallets/parachain-system/Cargo.toml @@ -15,6 +15,7 @@ workspace = true bytes = { workspace = true } codec = { features = ["derive"], workspace = true } environmental = { workspace = true } +hashbrown = { workspace = true } impl-trait-for-tuples = { workspace = true } log = { workspace = true } scale-info = { features = ["derive"], workspace = true } diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs index fde3bb1ddc294..0931a2f0c4935 100644 --- a/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs +++ b/cumulus/pallets/parachain-system/src/validate_block/trie_cache.rs @@ -15,11 +15,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use alloc::{ - boxed::Box, - collections::btree_map::{BTreeMap, Entry}, -}; +use alloc::boxed::Box; + use core::cell::{RefCell, RefMut}; +use hashbrown::{hash_map::Entry, HashMap}; use sp_state_machine::TrieCacheProvider; use sp_trie::NodeCodec; use trie_db::{node::NodeOwned, Hasher}; @@ -28,8 +27,8 @@ use trie_db::{node::NodeOwned, Hasher}; /// of values. To be used in `validate_block` to serve values and nodes that /// have already been loaded and decoded from the storage proof. pub struct TrieCache<'a, H: Hasher> { - node_cache: RefMut<'a, BTreeMap>>, - value_cache: Option, trie_db::CachedValue>>>, + node_cache: RefMut<'a, HashMap>>, + value_cache: Option, trie_db::CachedValue>>>, } impl<'a, H: Hasher> trie_db::TrieCache> for TrieCache<'a, H> { @@ -66,14 +65,14 @@ impl<'a, H: Hasher> trie_db::TrieCache> for TrieCache<'a, H> { /// Provider of [`TrieCache`] instances. pub struct CacheProvider { - node_cache: RefCell>>, + node_cache: RefCell>>, /// Cache: `storage_root` => `storage_key` => `value`. /// /// One `block` can for example use multiple tries (child tries) and we need to distinguish the /// cached (`storage_key`, `value`) between them. For this we are using the `storage_root` to /// distinguish them (even if the storage root is the same for two child tries, it just means /// that both are exactly the same trie and there would happen no collision). - value_cache: RefCell, trie_db::CachedValue>>>, + value_cache: RefCell, trie_db::CachedValue>>>, } impl CacheProvider { diff --git a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs index e8e400cee230e..dc70aae10163c 100644 --- a/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs +++ b/cumulus/pallets/parachain-system/src/validate_block/trie_recorder.rs @@ -22,11 +22,10 @@ use codec::Encode; -use alloc::{ - collections::{btree_map::BTreeMap, btree_set::BTreeSet}, - rc::Rc, -}; +use alloc::rc::Rc; + use core::cell::{RefCell, RefMut}; +use hashbrown::{HashMap, HashSet}; use sp_trie::{NodeCodec, ProofSizeProvider, StorageProof}; use trie_db::{Hasher, RecordedForKey, TrieAccess}; @@ -35,9 +34,9 @@ use trie_db::{Hasher, RecordedForKey, TrieAccess}; /// The internal size counting logic should align /// with ['sp_trie::recorder::Recorder']. pub struct SizeOnlyRecorder<'a, H: Hasher> { - seen_nodes: RefMut<'a, BTreeSet>, + seen_nodes: RefMut<'a, HashSet>, encoded_size: RefMut<'a, usize>, - recorded_keys: RefMut<'a, BTreeMap, RecordedForKey>>, + recorded_keys: RefMut<'a, HashMap, RecordedForKey>>, } impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder for SizeOnlyRecorder<'a, H> { @@ -91,9 +90,9 @@ impl<'a, H: trie_db::Hasher> trie_db::TrieRecorder for SizeOnlyRecorder< #[derive(Clone)] pub struct SizeOnlyRecorderProvider { - seen_nodes: Rc>>, + seen_nodes: Rc>>, encoded_size: Rc>, - recorded_keys: Rc, RecordedForKey>>>, + recorded_keys: Rc, RecordedForKey>>>, } impl Default for SizeOnlyRecorderProvider { diff --git a/prdoc/pr_8606.prdoc b/prdoc/pr_8606.prdoc new file mode 100644 index 0000000000000..493cad93dcc7b --- /dev/null +++ b/prdoc/pr_8606.prdoc @@ -0,0 +1,10 @@ +title: Use hashbrown hashmap/hashset in validation context +doc: +- audience: Node Dev + description: |- + Discovered while profiling https://github.com/paritytech/polkadot-sdk/issues/6131#issuecomment-2891523233 with the benchmark https://github.com/paritytech/polkadot-sdk/pull/8069 that when running in validation a big chunk of the time is spent inserting and retrieving data from the BTreeMap/BTreeSet. + + By switching to hashbrown HashMap/HashSet in validation TrieCache and TrieRecorder and the memory-db https://github.com/paritytech/trie/pull/221 read costs improve with around ~40% and write with about ~20% +crates: +- name: cumulus-pallet-parachain-system + bump: minor