Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cumulus/polkadot-omni-node/lib/src/common/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ where
let partial = T::new_partial(&config).map_err(sc_cli::Error::Service)?;
let db = partial.backend.expose_db();
let storage = partial.backend.expose_storage();
let shared_trie_cache = partial.backend.expose_shared_trie_cache();

cmd.run(config, partial.client, db, storage)
cmd.run(config, partial.client, db, storage, shared_trie_cache)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ fn benchmark_storage(db: &str, runtime: &str, base_path: &Path) -> ExitStatus {
.arg("--weight-path")
.arg(base_path)
.args(["--state-version", "0"])
.args(["--batch-size", "1"])
.args(["--warmups", "0"])
.args(["--add", "100", "--mul", "1.2", "--metric", "p75"])
.status()
Expand Down
3 changes: 2 additions & 1 deletion polkadot/cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,9 @@ pub fn run() -> Result<()> {
let (client, backend, _, _) = polkadot_service::new_chain_ops(&mut config)?;
let db = backend.expose_db();
let storage = backend.expose_storage();
let shared_trie_cache = backend.expose_shared_trie_cache();

cmd.run(config, client.clone(), db, storage).map_err(Error::SubstrateCli)
cmd.run(config, client.clone(), db, storage, shared_trie_cache).map_err(Error::SubstrateCli)
}),
BenchmarkCmd::Block(cmd) => runner.sync_run(|mut config| {
let (client, _, _, _) = polkadot_service::new_chain_ops(&mut config)?;
Expand Down
1 change: 1 addition & 0 deletions polkadot/tests/benchmark_storage_works.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ fn benchmark_storage(db: &str, base_path: &Path) -> ExitStatus {
.arg("--weight-path")
.arg(base_path)
.args(["--state-version", "0"])
.args(["--batch-size", "1"])
.args(["--warmups", "0"])
.args(["--add", "100", "--mul", "1.2", "--metric", "p75"])
.status()
Expand Down
23 changes: 23 additions & 0 deletions prdoc/pr_7867.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
title: benchmark/storage Make read/write benchmarks more accurate

doc:
- audience: [Runtime Dev, Node Dev]
description: |-
Improve the benchmark accuracy of read/write costs by making sure for both
reads and write we compute the amortized cost of a single key operation, by adding
a batch functionality to make sure the cost of common operations like root computation
is spread across multiple keys. Additionally, also add a pov-recorder flag, so that we
are able to replicate the same environment as parachains do.

crates:
- name: sc-client-db
bump: major
- name: frame-benchmarking-cli
bump: major
- name: polkadot-cli
bump: major
- name: polkadot-omni-node-lib
bump: major
- name: polkadot
bump: patch

3 changes: 2 additions & 1 deletion substrate/bin/node/cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,9 @@ pub fn run() -> Result<()> {
let partial = new_partial(&config, None)?;
let db = partial.backend.expose_db();
let storage = partial.backend.expose_storage();
let shared_trie_cache = partial.backend.expose_shared_trie_cache();

cmd.run(config, partial.client, db, storage)
cmd.run(config, partial.client, db, storage, shared_trie_cache)
},
BenchmarkCmd::Overhead(cmd) => {
// ensure that we keep the task manager alive
Expand Down
1 change: 1 addition & 0 deletions substrate/bin/node/cli/tests/benchmark_storage_works.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ fn benchmark_storage(db: &str, base_path: &Path) -> ExitStatus {
.arg("--weight-path")
.arg(base_path)
.args(["--state-version", "1"])
.args(["--batch-size", "1"])
.args(["--warmups", "0"])
.args(["--add", "100", "--mul", "1.2", "--metric", "p75"])
.arg("--include-child-trees")
Expand Down
10 changes: 10 additions & 0 deletions substrate/client/db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,16 @@ impl<Block: BlockT> Backend<Block> {
self.storage.clone()
}

/// Expose the shared trie cache that is used by this backend.
///
/// Should only be needed for benchmarking.
#[cfg(feature = "runtime-benchmarks")]
pub fn expose_shared_trie_cache(
&self,
) -> Option<sp_trie::cache::SharedTrieCache<HashingFor<Block>>> {
self.shared_trie_cache.clone()
}

fn from_database(
db: Arc<dyn Database<DbHash>>,
canonicalization_delay: u64,
Expand Down
50 changes: 47 additions & 3 deletions substrate/utils/frame/benchmarking-cli/src/storage/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use sc_cli::{CliConfiguration, DatabaseParams, PruningParams, Result, SharedPara
use sc_client_api::{Backend as ClientBackend, StorageProvider, UsageProvider};
use sc_client_db::DbHash;
use sc_service::Configuration;
use sp_api::CallApiAt;
use sp_blockchain::HeaderBackend;
use sp_database::{ColumnId, Database};
use sp_runtime::traits::{Block as BlockT, HashingFor};
Expand Down Expand Up @@ -116,6 +117,25 @@ pub struct StorageParams {
/// Include child trees in benchmark.
#[arg(long)]
pub include_child_trees: bool,

/// Disable PoV recorder.
///
/// The recorder has impact on performance when benchmarking with the TrieCache enabled.
/// If the chain is recording a proof while building/importing a block, the pov recorder
/// should be activated.
///
/// Hence, when generating weights for a parachain this should be activated and when generating
/// weights for a standalone chain this should be deactivated.
#[arg(long, default_value = "false")]
pub disable_pov_recorder: bool,

/// The batch size for the write benchmark.
///
/// Since the write size needs to also include the cost of computing the storage root, which is
/// done once at the end of the block, the batch size is used to simulate multiple writes in a
/// block.
#[arg(long, default_value_t = 100_000)]
pub batch_size: usize,
}

impl StorageCmd {
Expand All @@ -127,11 +147,15 @@ impl StorageCmd {
client: Arc<C>,
db: (Arc<dyn Database<DbHash>>, ColumnId),
storage: Arc<dyn Storage<HashingFor<Block>>>,
shared_trie_cache: Option<sp_trie::cache::SharedTrieCache<HashingFor<Block>>>,
) -> Result<()>
where
BA: ClientBackend<Block>,
Block: BlockT<Hash = DbHash>,
C: UsageProvider<Block> + StorageProvider<Block, BA> + HeaderBackend<Block>,
C: UsageProvider<Block>
+ StorageProvider<Block, BA>
+ HeaderBackend<Block>
+ CallApiAt<Block>,
{
let mut template = TemplateData::new(&cfg, &self.params)?;

Expand All @@ -140,7 +164,7 @@ impl StorageCmd {

if !self.params.skip_read {
self.bench_warmup(&client)?;
let record = self.bench_read(client.clone())?;
let record = self.bench_read(client.clone(), shared_trie_cache.clone())?;
if let Some(path) = &self.params.json_read_path {
record.save_json(&cfg, path, "read")?;
}
Expand All @@ -151,7 +175,7 @@ impl StorageCmd {

if !self.params.skip_write {
self.bench_warmup(&client)?;
let record = self.bench_write(client, db, storage)?;
let record = self.bench_write(client, db, storage, shared_trie_cache)?;
if let Some(path) = &self.params.json_write_path {
record.save_json(&cfg, path, "write")?;
}
Expand Down Expand Up @@ -197,11 +221,31 @@ impl StorageCmd {

for i in 0..self.params.warmups {
info!("Warmup round {}/{}", i + 1, self.params.warmups);
let mut child_nodes = Vec::new();

for key in keys.as_slice() {
let _ = client
.storage(hash, &key)
.expect("Checked above to exist")
.ok_or("Value unexpectedly empty");

if let Some(info) = self
.params
.include_child_trees
.then(|| self.is_child_key(key.clone().0))
.flatten()
{
// child tree key
for ck in client.child_storage_keys(hash, info.clone(), None, None)? {
child_nodes.push((ck.clone(), info.clone()));
}
}
}
for (key, info) in child_nodes.as_slice() {
let _ = client
.child_storage(hash, info, key)
.expect("Checked above to exist")
.ok_or("Value unexpectedly empty")?;
}
}

Expand Down
78 changes: 66 additions & 12 deletions substrate/utils/frame/benchmarking-cli/src/storage/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use sc_cli::Result;
use sc_client_api::{Backend as ClientBackend, StorageProvider, UsageProvider};
use sp_runtime::traits::{Block as BlockT, Header as HeaderT};

use log::info;
use rand::prelude::*;
use sc_cli::{Error, Result};
use sc_client_api::{Backend as ClientBackend, StorageProvider, UsageProvider};
use sp_api::CallApiAt;
use sp_runtime::traits::{Block as BlockT, HashingFor, Header as HeaderT};
use sp_state_machine::{backend::AsTrieBackend, Backend};
use std::{fmt::Debug, sync::Arc, time::Instant};

use super::cmd::StorageCmd;
Expand All @@ -29,9 +30,13 @@ use crate::shared::{new_rng, BenchRecord};
impl StorageCmd {
/// Benchmarks the time it takes to read a single Storage item.
/// Uses the latest state that is available for the given client.
pub(crate) fn bench_read<B, BA, C>(&self, client: Arc<C>) -> Result<BenchRecord>
pub(crate) fn bench_read<B, BA, C>(
&self,
client: Arc<C>,
_shared_trie_cache: Option<sp_trie::cache::SharedTrieCache<HashingFor<B>>>,
) -> Result<BenchRecord>
where
C: UsageProvider<B> + StorageProvider<B, BA>,
C: UsageProvider<B> + StorageProvider<B, BA> + CallApiAt<B>,
B: BlockT + Debug,
BA: ClientBackend<B>,
<<B as BlockT>::Header as HeaderT>::Number: From<u32>,
Expand All @@ -49,6 +54,19 @@ impl StorageCmd {
// Interesting part here:
// Read all the keys in the database and measure the time it takes to access each.
info!("Reading {} keys", keys.len());

// Read using the same TrieBackend and recorder for up to `batch_size` keys.
// This would allow us to measure the amortized cost of reading a key.
let recorder = (!self.params.disable_pov_recorder).then(|| Default::default());
let mut state = client
.state_at(best_hash)
.map_err(|_err| Error::Input("State not found".into()))?;
let mut as_trie_backend = state.as_trie_backend();
let mut backend = sp_state_machine::TrieBackendBuilder::wrap(&as_trie_backend)
.with_optional_recorder(recorder)
.build();
let mut read_in_batch = 0;

for key in keys.as_slice() {
match (self.params.include_child_trees, self.is_child_key(key.clone().0)) {
(true, Some(info)) => {
Expand All @@ -60,13 +78,31 @@ impl StorageCmd {
_ => {
// regular key
let start = Instant::now();
let v = client
.storage(best_hash, &key)

let v = backend
.storage(key.0.as_ref())
.expect("Checked above to exist")
.ok_or("Value unexpectedly empty")?;
record.append(v.0.len(), start.elapsed())?;
record.append(v.len(), start.elapsed())?;
},
}
read_in_batch += 1;
if read_in_batch >= self.params.batch_size {
// Using a new recorder for every read vs using the same for the entire batch
// produces significant different results. Since in the real use case we use a
// single recorder per block, simulate the same behavior by creating a new
// recorder every batch size, so that the amortized cost of reading a key is
// measured in conditions closer to the real world.
let recorder = (!self.params.disable_pov_recorder).then(|| Default::default());
state = client
.state_at(best_hash)
.map_err(|_err| Error::Input("State not found".to_string()))?;
as_trie_backend = state.as_trie_backend();
backend = sp_state_machine::TrieBackendBuilder::wrap(&as_trie_backend)
.with_optional_recorder(recorder)
.build();
read_in_batch = 0;
}
}

if self.params.include_child_trees {
Expand All @@ -75,11 +111,29 @@ impl StorageCmd {
info!("Reading {} child keys", child_nodes.len());
for (key, info) in child_nodes.as_slice() {
let start = Instant::now();
let v = client
.child_storage(best_hash, info, key)
let v = backend
.child_storage(info, key.0.as_ref())
.expect("Checked above to exist")
.ok_or("Value unexpectedly empty")?;
record.append(v.0.len(), start.elapsed())?;
record.append(v.len(), start.elapsed())?;

read_in_batch += 1;
if read_in_batch >= self.params.batch_size {
// Using a new recorder for every read vs using the same for the entire batch
// produces significant different results. Since in the real use case we use a
// single recorder per block, simulate the same behavior by creating a new
// recorder every batch size, so that the amortized cost of reading a key is
// measured in conditions closer to the real world.
let recorder = (!self.params.disable_pov_recorder).then(|| Default::default());
state = client
.state_at(best_hash)
.map_err(|_err| Error::Input("State not found".to_string()))?;
as_trie_backend = state.as_trie_backend();
backend = sp_state_machine::TrieBackendBuilder::wrap(&as_trie_backend)
.with_optional_recorder(recorder)
.build();
read_in_batch = 0;
}
}
}
Ok(record)
Expand Down
Loading
Loading