diff --git a/Cargo.lock b/Cargo.lock index a41378af..6a5c2d84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1939,16 +1939,6 @@ dependencies = [ "rusticata-macros", ] -[[package]] -name = "deranged" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" -dependencies = [ - "powerfmt", - "serde", -] - [[package]] name = "derive_arbitrary" version = "1.3.2" @@ -2848,6 +2838,7 @@ dependencies = [ "libipld", "libp2p", "libp2p-bitswap", + "literally", "multiaddr 0.16.0", "num-traits", "openssl", @@ -2865,6 +2856,7 @@ dependencies = [ "tendermint-rpc", "tokio", "tower-abci", + "trace4rs", "tracing", "tracing-subscriber", ] @@ -5668,6 +5660,12 @@ version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" +[[package]] +name = "literally" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d2be3f5a0d4d5c983d1f8ecc2a87676a0875a14feb9eebf0675f7c3e2f3c35" + [[package]] name = "lock_api" version = "0.4.11" @@ -6257,6 +6255,15 @@ dependencies = [ "syn 2.0.39", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "object" version = "0.29.0" @@ -6548,6 +6555,24 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "path-slash" version = "0.2.1" @@ -6892,12 +6917,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -8360,6 +8379,17 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" +[[package]] +name = "smart-default" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "133659a15339456eeeb07572eb02a91c91e9815e9cbc89566944d2c8d3efdbf6" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "smoke-test" version = "0.1.0" @@ -9060,13 +9090,13 @@ dependencies = [ [[package]] name = "time" -version = "0.3.30" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ - "deranged", "itoa", - "powerfmt", + "libc", + "num_threads", "serde", "time-core", "time-macros", @@ -9074,15 +9104,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" dependencies = [ "time-core", ] @@ -9368,6 +9398,52 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +[[package]] +name = "trace4rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79478dd8f523a9acec48b5e71ad56dc0f3b0dcaf703da29961d31a6bdd713bf0" +dependencies = [ + "camino", + "literally", + "log", + "once_cell", + "parking_lot 0.12.1", + "path-absolutize", + "regex", + "rustversion", + "thiserror", + "time", + "trace4rs-config", + "trace4rs-fmtorp", + "tracing", + "tracing-log", + "tracing-subscriber", + "utc-offset", +] + +[[package]] +name = "trace4rs-config" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a8eca7b495cb7659cb081820b3f410b42b9edaecb28b0d7861cff6606fc882" +dependencies = [ + "literally", + "smart-default", + "thiserror", + "tracing", +] + +[[package]] +name = "trace4rs-fmtorp" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3bc666374e9f4d6962ba6bbf1f9d5f6a56d4a8c08c6d07985684b9078e8bf0e" +dependencies = [ + "thiserror", + "tracing-subscriber", +] + [[package]] name = "tracing" version = "0.1.40" @@ -9432,6 +9508,7 @@ dependencies = [ "sharded-slab", "smallvec", "thread_local", + "time", "tracing-core", "tracing-log", ] @@ -9685,6 +9762,18 @@ dependencies = [ "serde", ] +[[package]] +name = "utc-offset" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7f5187fd6893d08fecb6cdcb09badfbd5a187bf947fc12abc36c7bcb88dcd51" +dependencies = [ + "once_cell", + "parking_lot 0.12.1", + "thiserror", + "time", +] + [[package]] name = "utf-8" version = "0.7.6" diff --git a/Cargo.toml b/Cargo.toml index 6325a1dc..cff1bfcc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,6 +85,8 @@ tracing = "0.1" tracing-subscriber = "0.3" url = "2.4.1" zeroize = "1.6" +trace4rs = "0.5.1" +literally = "0.1.3" # Vendored for cross-compilation, see https://github.com/cross-rs/cross/wiki/Recipes#openssl openssl = { version = "0.10", features = ["vendored"] } diff --git a/fendermint/app/Cargo.toml b/fendermint/app/Cargo.toml index 07b87d63..c2ea05c0 100644 --- a/fendermint/app/Cargo.toml +++ b/fendermint/app/Cargo.toml @@ -33,6 +33,8 @@ tokio = { workspace = true } tower-abci = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } +trace4rs = { workspace = true } +literally = { workspace = true } fendermint_abci = { path = "../abci" } fendermint_app_options = { path = "./options" } diff --git a/fendermint/app/options/src/lib.rs b/fendermint/app/options/src/lib.rs index bfcbf33c..0251d48b 100644 --- a/fendermint/app/options/src/lib.rs +++ b/fendermint/app/options/src/lib.rs @@ -75,6 +75,10 @@ pub struct Options { )] pub home_dir: PathBuf, + /// Set a custom directory for ipc log files. + #[arg(short = 'l', long, env = "FM_LOG_DIR")] + pub log_dir: Option, + /// Optionally override the default configuration. #[arg(short, long, default_value = "dev")] pub mode: String, diff --git a/fendermint/app/src/main.rs b/fendermint/app/src/main.rs index f27b9c5f..f89ec09c 100644 --- a/fendermint/app/src/main.rs +++ b/fendermint/app/src/main.rs @@ -1,10 +1,19 @@ // Copyright 2022-2023 Protocol Labs // SPDX-License-Identifier: Apache-2.0, MIT -use tracing_subscriber::FmtSubscriber; +use anyhow::{anyhow, Context}; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use trace4rs::config::{AppenderId, Policy, Target}; +use trace4rs::{ + config::{self, Config, Format}, + Handle, +}; pub use fendermint_app_options as options; pub use fendermint_app_settings as settings; +use fendermint_app_settings::expand_tilde; mod cmd; @@ -12,16 +21,8 @@ mod cmd; async fn main() { let opts = options::parse(); - // Log events to stdout. if let Some(level) = opts.tracing_level() { - // Writing to stderr so if we have output like JSON then we can pipe it to something else. - let subscriber = FmtSubscriber::builder() - .with_max_level(level) - .with_writer(std::io::stderr) - .finish(); - - tracing::subscriber::set_global_default(subscriber) - .expect("setting default subscriber failed"); + create_log(level, opts.log_dir.as_ref()).expect("cannot create logging"); } if let Err(e) = cmd::exec(&opts).await { @@ -30,6 +31,89 @@ async fn main() { } } +fn create_log(level: tracing::Level, log_dir: Option<&PathBuf>) -> anyhow::Result<()> { + let console_appender = config::Appender::Console; + + // default logging output info log to console + let default = config::Logger { + level: config::LevelFilter::from_str(level.as_str())?, + appenders: literally::hset! { AppenderId::from("console") }, + format: Format::default(), + }; + + let mut config = Config { + default: default.clone(), + loggers: literally::hmap! {}, + appenders: literally::hmap! {AppenderId::from("console") => console_appender}, + }; + + if let Some(log_dir) = log_dir { + let log_folder = expand_tilde(log_dir) + .to_str() + .ok_or_else(|| anyhow!("cannot parse log folder"))? + .to_string(); + std::fs::create_dir_all(&log_folder).context("cannot create log folder")?; + + let topdown_appender = config::Appender::RollingFile { + path: format!("{log_folder}/topdown.log"), + policy: Policy { + maximum_file_size: "10mb".to_string(), + // we keep the last 5 log files, older files will be deleted + max_size_roll_backups: 5, + pattern: None, + }, + }; + let debug_appender = config::Appender::RollingFile { + path: format!("{log_folder}/debug.log"), + policy: Policy { + maximum_file_size: "10mb".to_string(), + // we keep the last 5 log files, older files will be deleted + max_size_roll_backups: 5, + pattern: None, + }, + }; + + let topdown_logger = config::Logger { + level: config::LevelFilter::DEBUG, + appenders: literally::hset! { AppenderId::from("topdown") }, + format: Format::default(), + }; + let debug_logger = config::Logger { + level: config::LevelFilter::DEBUG, + appenders: literally::hset! { AppenderId::from("debug") }, + format: Format::default(), + }; + + config.loggers.insert( + Target::from("fendermint_vm_topdown"), + topdown_logger.clone(), + ); + config.loggers.insert( + Target::from("fendermint_vm_interpreter::chain"), + topdown_logger, + ); + config + .appenders + .insert(AppenderId::from("topdown"), topdown_appender); + + // maybe there are better ways to handle this, but * does not seem to be supported + config + .loggers + .insert(Target::from("fendermint"), debug_logger); + config.loggers.insert(Target::from("fendermint"), default); + config + .appenders + .insert(AppenderId::from("debug"), debug_appender); + } + + let handle = Arc::new(Handle::try_from(config)?); + + tracing::subscriber::set_global_default(handle.subscriber()) + .context("setting default subscriber failed")?; + + Ok(()) +} + #[cfg(test)] mod tests { use cid::Cid; diff --git a/fendermint/vm/interpreter/src/chain.rs b/fendermint/vm/interpreter/src/chain.rs index a05ae2e7..3bbc448c 100644 --- a/fendermint/vm/interpreter/src/chain.rs +++ b/fendermint/vm/interpreter/src/chain.rs @@ -242,6 +242,11 @@ where // commit parent finality first let finality = IPCParentFinality::new(p.height, p.block_hash); + tracing::debug!( + finality = finality.to_string(), + "chain interpreter received topdown exec proposal", + ); + let (prev_height, prev_finality) = topdown::commit_finality( &self.gateway_caller, &mut state, @@ -250,12 +255,26 @@ where ) .await .context("failed to commit finality")?; + tracing::debug!( + previous_committed_height = prev_height, + previous_committed_finality = prev_finality + .as_ref() + .map(|f| format!("{f}")) + .unwrap_or_else(|| String::from("None")), + "chain interpreter committed topdown finality", + ); // error happens if we cannot get the validator set from ipc agent after retries let validator_changes = provider .validator_changes_from(prev_height + 1, finality.height) .await .context("failed to fetch validator changes")?; + tracing::debug!( + from = prev_height + 1, + to = finality.height, + msgs = validator_changes.len(), + "chain interpreter received total validator changes" + ); self.gateway_caller .store_validator_changes(&mut state, validator_changes) @@ -267,22 +286,26 @@ where .await .context("failed to fetch top down messages")?; tracing::debug!( - "total topdown msgs: {} from {} to {}", - msgs.len(), - prev_height + 1, - p.height + number_of_messages = msgs.len(), + start = prev_height + 1, + end = p.height, + "chain interpreter received topdown msgs", ); let ret = topdown::execute_topdown_msgs(&self.gateway_caller, &mut state, msgs) .await .context("failed to execute top down messages")?; + tracing::debug!("chain interpreter applied topdown msgs"); atomically(|| { provider.set_new_finality(finality.clone(), prev_finality.clone()) }) .await; - tracing::debug!("new finality updated: {:?}", finality); + tracing::debug!( + finality = finality.to_string(), + "chain interpreter has set new" + ); Ok(((pool, provider, state), ChainMessageApplyRet::Ipc(ret))) } diff --git a/fendermint/vm/interpreter/src/fvm/state/ipc.rs b/fendermint/vm/interpreter/src/fvm/state/ipc.rs index 1aa1c73d..8155a10f 100644 --- a/fendermint/vm/interpreter/src/fvm/state/ipc.rs +++ b/fendermint/vm/interpreter/src/fvm/state/ipc.rs @@ -209,6 +209,10 @@ impl GatewayCaller { state: &mut FvmExecState, changes: Vec, ) -> anyhow::Result<()> { + if changes.is_empty() { + return Ok(()); + } + let mut change_requests = vec![]; for c in changes { change_requests.push(router::StakingChangeRequest::try_from(c)?); diff --git a/fendermint/vm/topdown/src/error.rs b/fendermint/vm/topdown/src/error.rs index b0780599..b826e351 100644 --- a/fendermint/vm/topdown/src/error.rs +++ b/fendermint/vm/topdown/src/error.rs @@ -15,4 +15,8 @@ pub enum Error { ParentChainReorgDetected, #[error("Cannot query parent at height {1}: {0}")] CannotQueryParent(String, BlockHeight), + /// This error happens when querying top down messages, the block ahead are all null rounds. + /// See `parent_views_at_height` for detailed explanation + #[error("Look ahead limit reached from {0}: {1}")] + LookAheadLimitReached(BlockHeight, BlockHeight), } diff --git a/fendermint/vm/topdown/src/finality/fetch.rs b/fendermint/vm/topdown/src/finality/fetch.rs index f8564d13..a0a7a349 100644 --- a/fendermint/vm/topdown/src/finality/fetch.rs +++ b/fendermint/vm/topdown/src/finality/fetch.rs @@ -40,7 +40,10 @@ macro_rules! retry { } tracing::warn!( - "cannot query ipc parent_client due to: {e}, retires: {retries}, wait: {wait:?}" + error = e.to_string(), + retries, + wait = ?wait, + "cannot query ipc parent_client" ); if retries > 0 { @@ -73,8 +76,9 @@ impl ParentViewProvider for CachedF for h in from..=to { let mut r = self.validator_changes(h).await?; tracing::debug!( - "obtained validator change set (len {}) at height {h}", - r.len() + number_of_messages = r.len(), + height = h, + "obtained validator change set", ); v.append(&mut r); } @@ -137,7 +141,11 @@ impl ParentViewProvider for CachedF let mut v = vec![]; for h in from..=to { let mut r = self.top_down_msgs(h, block_hash).await?; - tracing::debug!("obtained topdown messages (len {}) at height {h}", r.len()); + tracing::debug!( + number_of_top_down_messages = r.len(), + height = h, + "obtained topdown messages", + ); v.append(&mut r); } Ok(v) diff --git a/fendermint/vm/topdown/src/finality/null.rs b/fendermint/vm/topdown/src/finality/null.rs index b8ed892a..d549900b 100644 --- a/fendermint/vm/topdown/src/finality/null.rs +++ b/fendermint/vm/topdown/src/finality/null.rs @@ -106,7 +106,7 @@ impl FinalityWithNull { }; let proposal = IPCParentFinality { height, block_hash }; - tracing::debug!("new proposal: {proposal:?}"); + tracing::debug!(proposal = proposal.to_string(), "new proposal"); Ok(Some(proposal)) } @@ -160,7 +160,7 @@ impl FinalityWithNull { if let Some(i) = v.as_ref() { f(i) } else { - tracing::debug!("height: {height} is a null round, return default"); + tracing::debug!(height, "a null round detected, return default"); d() } })) @@ -188,11 +188,11 @@ impl FinalityWithNull { ) -> StmResult<(), Error> { if !top_down_msgs.is_empty() { // make sure incoming top down messages are ordered by nonce sequentially - tracing::debug!("top down messages: {top_down_msgs:#?}"); + tracing::debug!(?top_down_msgs); ensure_sequential(&top_down_msgs, |msg| msg.msg.nonce)?; }; if !validator_changes.is_empty() { - tracing::debug!("validator changes: {validator_changes:#?}"); + tracing::debug!(?validator_changes, "validator changes"); ensure_sequential(&validator_changes, |change| change.configuration_number)?; } diff --git a/fendermint/vm/topdown/src/lib.rs b/fendermint/vm/topdown/src/lib.rs index a837e82b..6a143160 100644 --- a/fendermint/vm/topdown/src/lib.rs +++ b/fendermint/vm/topdown/src/lib.rs @@ -76,6 +76,7 @@ impl Display for IPCParentFinality { ) } } + #[async_trait] pub trait ParentViewProvider { /// Obtain the genesis epoch of the current subnet in the parent diff --git a/fendermint/vm/topdown/src/sync.rs b/fendermint/vm/topdown/src/sync.rs index 6b1a7c63..2bab91ec 100644 --- a/fendermint/vm/topdown/src/sync.rs +++ b/fendermint/vm/topdown/src/sync.rs @@ -6,8 +6,8 @@ use crate::error::Error; use crate::finality::ParentViewPayload; use crate::proxy::{IPCProviderProxy, ParentQueryProxy}; use crate::{ - BlockHash, BlockHeight, CachedFinalityProvider, Config, IPCParentFinality, - ParentFinalityProvider, Toggle, NULL_ROUND_ERR_MSG, + is_null_round_str, BlockHash, BlockHeight, CachedFinalityProvider, Config, IPCParentFinality, + ParentFinalityProvider, Toggle, }; use async_stm::{atomically, atomically_or_err, Stm}; @@ -59,22 +59,22 @@ async fn query_starting_finality { - tracing::warn!("cannot get committed finality: {e}"); + tracing::warn!(error = e.to_string(), "cannot get committed finality"); tokio::time::sleep(Duration::from_secs(5)).await; continue; } }; - tracing::info!("latest finality committed: {finality}"); + tracing::info!(finality = finality.to_string(), "latest finality committed"); // this means there are no previous committed finality yet, we fetch from parent to get // the genesis epoch of the current subnet and its corresponding block hash. if finality.height == 0 { let genesis_epoch = parent_client.get_genesis_epoch().await?; - tracing::debug!("obtained genesis epoch: {genesis_epoch}"); + tracing::debug!(genesis_epoch = genesis_epoch, "obtained genesis epoch"); let r = parent_client.get_block_hash(genesis_epoch).await?; tracing::debug!( - "obtained genesis block hash: {:?}", - hex::encode(&r.block_hash) + block_hash = hex::encode(&r.block_hash), + "obtained genesis block hash", ); finality = IPCParentFinality { @@ -82,7 +82,8 @@ async fn query_starting_finality ending_height { + if last_recorded_height > max_ending_height { tracing::warn!( - "last recorded height: {last_recorded_height} more than ending height: {ending_height}" + last_recorded_height = last_recorded_height, + max_ending_height = max_ending_height, + "last recorded height more than max ending height" ); return reset_cache(parent_proxy, provider, query).await; } @@ -248,14 +256,22 @@ where // we are adding 1 to the height because we are fetching block by block, we also configured // the sequential cache to use increment == 1. let starting_height = last_recorded_height + 1; - let ending_height = min(ending_height, MAX_PARENT_VIEW_BLOCK_GAP + starting_height); - tracing::debug!("parent view range: {starting_height}-{ending_height}"); + let ending_height = min( + max_ending_height, + MAX_PARENT_VIEW_BLOCK_GAP + starting_height, + ); + tracing::debug!( + start = starting_height, + end = ending_height, + "parent view range" + ); let new_parent_views = parent_views_in_block_range( parent_proxy, last_height_hash, starting_height, ending_height, + max_ending_height, ) .await?; @@ -267,7 +283,7 @@ where }) .await?; - tracing::debug!("updated new parent views till height: {ending_height}"); + tracing::debug!(height = ending_height, "updated new parent views to"); Ok(()) } @@ -315,7 +331,7 @@ async fn last_recorded_data( LastRecordedBlock::Empty => Ok(None), LastRecordedBlock::FilledBlock { height, hash } => Ok(Some((height, hash))), LastRecordedBlock::NullBlock(height) => { - tracing::info!("last recorded height {height} is a null block"); + tracing::info!(height, "last recorded height is a null block"); // Imaging the list of blocks as follows: // @@ -340,7 +356,11 @@ async fn last_recorded_data( match atomically(|| provider.first_non_null_parent_hash(height)).await { None => unreachable!("should have last committed finality at this point"), Some(hash) => { - tracing::info!("First non null parent hash: {hash:02x?} at height: {height}"); + tracing::info!( + block_height = height, + parent_hash = hex::encode(&hash), + "First non null parent", + ); Ok(Some((height, hash))) } } @@ -374,19 +394,21 @@ async fn parent_views_in_block_range( mut previous_hash: BlockHash, start_height: BlockHeight, end_height: BlockHeight, + max_ending_height: BlockHeight, ) -> Result { let mut updates = vec![]; let mut total_top_down_msgs = 0; for h in start_height..=end_height { - match parent_views_at_height(parent_proxy, &previous_hash, h, end_height).await { + match parent_views_at_height(parent_proxy, &previous_hash, h, max_ending_height).await { Ok((hash, changeset, cross_msgs)) => { total_top_down_msgs += cross_msgs.len(); tracing::debug!( - "previous previous hash: {:02x?}, previous hash: {:02x?}", - previous_hash, - hash + height = h, + previous_previous_hahs = hex::encode(&previous_hash), + previous_hash = hex::encode(&hash), + "matching hashes", ); previous_hash = hash.clone(); @@ -403,9 +425,16 @@ async fn parent_views_in_block_range( // A null round will never have a block, which means that we can advance to the next height. Err(e) => { let err_msg = e.to_string(); - if err_msg.contains(NULL_ROUND_ERR_MSG) { - tracing::warn!("null round at height: {h} detected, skip"); + if is_null_round_str(&err_msg) { + tracing::warn!(height = h, "null round detected, skip"); updates.push((h, None)); + } else if let Error::LookAheadLimitReached(start, limit) = e { + tracing::warn!( + start_height = start, + limit_height = limit, + "look ahead limit reached, store updates so far in cache", + ); + break; } else { return Err(e); } @@ -413,17 +442,26 @@ async fn parent_views_in_block_range( } } - tracing::debug!("obtained updates: {updates:?}"); + tracing::debug!(?updates, "obtained parent view updates"); Ok(updates) } -/// Obtain the new parent views for the target height +/// Obtain the new parent views for the target height. +/// +/// For `max_ending_height`, the explanation is as follows: +/// Say the current height is h and we need to fetch the top down messages. For `lotus`, the state +/// at height h is only finalized at h + 1. The block hash at height h will return empty top down +/// messages. In this case, we need to get the block hash at height h + 1 to query the top down messages. +/// Sadly, the height h + 1 could be null block, we need to continuously look ahead until we found +/// a height that is not null. But we cannot go all the way to the block head as it's not considered +/// final yet. So we need to use a `max_ending_height` that restricts how far as head we should go. +/// If we still cannot find a height that is non-null, maybe we should try later async fn parent_views_at_height( parent_proxy: &Arc, previous_hash: &BlockHash, height: BlockHeight, - look_ahead_limit: BlockHeight, + max_ending_height: BlockHeight, ) -> Result { let block_hash_res = parent_proxy .get_block_hash(height) @@ -431,9 +469,11 @@ async fn parent_views_at_height( .map_err(|e| Error::CannotQueryParent(e.to_string(), height))?; if block_hash_res.parent_block_hash != *previous_hash { tracing::warn!( - "parent block hash at {height} is {:02x?} diff than previous hash: {previous_hash:02x?}", - block_hash_res.parent_block_hash - ); + height, + parent_hash = hex::encode(&block_hash_res.parent_block_hash), + previous_hash = hex::encode(previous_hash), + "parent block hash diff than previous hash", + ); return Err(Error::ParentChainReorgDetected); } @@ -443,9 +483,10 @@ async fn parent_views_at_height( .map_err(|e| Error::CannotQueryParent(e.to_string(), height))?; if changes_res.block_hash != block_hash_res.block_hash { tracing::warn!( - "change set block hash at {height} is {:02x?} diff than hash: {:02x?}", - block_hash_res.parent_block_hash, - block_hash_res.block_hash + height, + change_set_hash = hex::encode(&changes_res.block_hash), + block_hash = hex::encode(&block_hash_res.block_hash), + "change set block hash does not equal block hash", ); return Err(Error::ParentChainReorgDetected); } @@ -455,13 +496,13 @@ async fn parent_views_at_height( // the block hash at height h + 1 to query the top down messages. // Sadly, the height h + 1 could be null block, we need to continuously look ahead // until we found a height that is not null - let next_hash = next_block_hash(parent_proxy, height + 1, look_ahead_limit).await?; + let next_hash = first_non_null_block_hash(parent_proxy, height + 1, max_ending_height).await?; if next_hash.parent_block_hash != block_hash_res.block_hash { tracing::warn!( - "next block hash at {} is {:02x?} diff than hash: {:02x?}", - height + 1, - next_hash.parent_block_hash, - block_hash_res.block_hash + next_block_height = height + 1, + next_block_parent = hex::encode(&next_hash.parent_block_hash), + block_hash = hex::encode(&block_hash_res.block_hash), + "next block's parent hash does not equal block hash", ); return Err(Error::ParentChainReorgDetected); } @@ -477,24 +518,26 @@ async fn parent_views_at_height( )) } -/// Get the next non-null block hash. If height is a null round, then we need to look further util we -/// find one that is not null round. -async fn next_block_hash( +/// Get the first non-null block hash in between heights. If height is a null round, then we need +/// to look further util we find one that is not null round. +async fn first_non_null_block_hash( parent_proxy: &Arc, - height: BlockHeight, - mut look_ahead_limit: BlockHeight, + start: BlockHeight, + mut end: BlockHeight, ) -> Result { // at least we run for height - look_ahead_limit = max(height, look_ahead_limit); + end = max(start, end); - for h in height..=look_ahead_limit { + for h in start..=end { match parent_proxy.get_block_hash(h).await { Ok(h) => return Ok(h), Err(e) => { let msg = e.to_string(); - if msg.contains(NULL_ROUND_ERR_MSG) { + if is_null_round_str(&msg) { tracing::warn!( - "look ahead encountered error: height {h} is a null round, message: {e}" + height = h, + error = e.to_string(), + "look ahead height is a null round" ); continue; } else { @@ -503,11 +546,5 @@ async fn next_block_hash( } } } - Err(Error::CannotQueryParent( - format!( - "cannot get next block hash in range {}-{}, check your parent chain", - height, look_ahead_limit - ), - look_ahead_limit, - )) + Err(Error::LookAheadLimitReached(start, end)) } diff --git a/infra/scripts/fendermint.toml b/infra/scripts/fendermint.toml index 9871454a..6d1a89c5 100644 --- a/infra/scripts/fendermint.toml +++ b/infra/scripts/fendermint.toml @@ -19,6 +19,7 @@ docker run \ --network ${NETWORK_NAME} \ --volume ${BASE_DIR}:/data \ --env FM_DATA_DIR=/data/${NODE_NAME}/fendermint/data \ + --env FM_LOG_DIR=/data/${NODE_NAME}/fendermint/data/logs \ --env FM_ABCI__LISTEN__HOST=0.0.0.0 \ --env FM_ETH__LISTEN__HOST=0.0.0.0 \ --env FM_CHAIN_NAME=${NETWORK_NAME} \ @@ -46,6 +47,7 @@ docker run \ --network ${NETWORK_NAME} \ --volume ${BASE_DIR}:/data \ --env FM_DATA_DIR=/data/${NODE_NAME}/fendermint/data \ + --env FM_LOG_DIR=/data/${NODE_NAME}/fendermint/data/logs \ --env FM_CHAIN_NAME=${NETWORK_NAME} \ --env FM_IPC__SUBNET_ID=${SUBNET_ID} \ --env FM_IPC__TOPDOWN__CHAIN_HEAD_DELAY=${TOPDOWN_CHAIN_HEAD_DELAY} \ @@ -86,6 +88,7 @@ docker run \ --network ${NETWORK_NAME} \ --volume ${BASE_DIR}:/data \ --env FM_DATA_DIR=/data/${NODE_NAME}/fendermint/data \ + --env FM_LOG_DIR=/data/${NODE_NAME}/fendermint/data/logs \ --env FM_CHAIN_NAME=${NETWORK_NAME} \ --env FM_IPC__SUBNET_ID=${SUBNET_ID} \ --env FM_IPC__TOPDOWN__CHAIN_HEAD_DELAY=${TOPDOWN_CHAIN_HEAD_DELAY} \