From 8718ba17b8a17ecf75c46870290924e0509b235a Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 12 Jul 2022 19:01:21 +0300 Subject: [PATCH 01/45] Draft collator side changes --- node/collation-generation/src/lib.rs | 10 +- .../src/collator_side/mod.rs | 323 +++++++++++++----- node/network/collator-protocol/src/error.rs | 18 +- .../src/validator_side/mod.rs | 2 +- node/overseer/src/lib.rs | 2 + node/subsystem-types/src/messages.rs | 8 +- .../src/backing_implicit_view.rs | 43 ++- 7 files changed, 311 insertions(+), 95 deletions(-) diff --git a/node/collation-generation/src/lib.rs b/node/collation-generation/src/lib.rs index 500b500636ba..c68ba26acc0b 100644 --- a/node/collation-generation/src/lib.rs +++ b/node/collation-generation/src/lib.rs @@ -286,6 +286,7 @@ async fn handle_new_activations( "collation-builder", Box::pin(async move { let persisted_validation_data_hash = validation_data.hash(); + let parent_head_data_hash = validation_data.parent_head.hash(); let (collation, result_sender) = match (task_config.collator)(relay_parent, &validation_data).await { @@ -385,8 +386,13 @@ async fn handle_new_activations( if let Err(err) = task_sender .send( - CollatorProtocolMessage::DistributeCollation(ccr, pov, result_sender) - .into(), + CollatorProtocolMessage::DistributeCollation( + ccr, + parent_head_data_hash, + pov, + result_sender, + ) + .into(), ) .await { diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 66b404551c52..d34a776af200 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -39,11 +39,13 @@ use polkadot_node_primitives::{CollationSecondedSignal, PoV, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ - CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, + CollatorProtocolMessage, HypotheticalDepthRequest, NetworkBridgeEvent, + NetworkBridgeMessage, ProspectiveParachainsMessage, RuntimeApiMessage, RuntimeApiRequest, }, - overseer, FromOrchestra, OverseerSignal, PerLeafSpan, + overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; use polkadot_node_subsystem_util::{ + backing_implicit_view::View as ImplicitView, metrics::{self, prometheus}, runtime::{get_availability_cores, get_group_rotation_info, RuntimeInfo}, TimeoutExt, @@ -251,6 +253,7 @@ impl CollationStatus { /// A collation built by the collator. struct Collation { receipt: CandidateReceipt, + parent_head_data_hash: Hash, pov: PoV, status: CollationStatus, } @@ -272,6 +275,56 @@ struct WaitingCollationFetches { type ActiveCollationFetches = FuturesUnordered + Send + 'static>>>; +#[derive(Debug, Clone, Copy, PartialEq)] +enum ProspectiveParachainsMode { + // v2 runtime API: no prospective parachains. + Disabled, + // vstaging runtime API: prospective parachains. + Enabled, +} + +impl ProspectiveParachainsMode { + fn is_enabled(&self) -> bool { + matches!(self, Self::Enabled) + } +} + +async fn prospective_parachains_mode( + sender: &mut Sender, + leaf_hash: Hash, +) -> Result +where + Sender: CollatorProtocolSenderTrait, +{ + // TODO: call a Runtime API once staging version is available + // https://github.com/paritytech/substrate/discussions/11338 + // + // Implementation should be shared with backing & provisioner. + + let (tx, rx) = oneshot::channel(); + sender + .send_message(RuntimeApiMessage::Request(leaf_hash, RuntimeApiRequest::Version(tx))) + .await; + + let version = rx + .await + .map_err(Error::CancelledRuntimeApiVersion)? + .map_err(Error::RuntimeApi)?; + + if version == 3 { + Ok(ProspectiveParachainsMode::Enabled) + } else { + if version != 2 { + gum::warn!( + target: LOG_TARGET, + "Runtime API version is {}, expected 2 or 3. Prospective parachains are disabled", + version + ); + } + Ok(ProspectiveParachainsMode::Disabled) + } +} + struct State { /// Our network peer id. local_peer_id: PeerId, @@ -287,8 +340,21 @@ struct State { /// to determine what is relevant to them. peer_views: HashMap, - /// Our own view. - view: OurView, + /// Leaves that do support asynchronous backing along with + /// implicit ancestry. Leaves from the implicit view are present in + /// `active_leaves`, the opposite doesn't hold true. + /// + /// Relay-chain blocks which don't support prospective parachains are + /// never included in the fragment trees of active leaves which do. In + /// particular, this means that if a given relay parent belongs to implicit + /// ancestry of some active leaf, then it does support prospective parachains. + implicit_view: ImplicitView, + + /// All active leaves observed by us, including both that do and do not + /// support prospective parachains. This mapping works as a replacement for + /// [`polkadot_node_network_protocol::View`] and can be dropped once the transition + /// to asynchronous backing is done. + active_leaves: HashMap, /// Span per relay parent. span_per_relay_parent: HashMap, @@ -333,7 +399,8 @@ impl State { metrics, collating_on: Default::default(), peer_views: Default::default(), - view: Default::default(), + implicit_view: Default::default(), + active_leaves: Default::default(), span_per_relay_parent: Default::default(), collations: Default::default(), collation_result_senders: Default::default(), @@ -345,10 +412,23 @@ impl State { } /// Get all peers which have the given relay parent in their view. - fn peers_interested_in_leaf(&self, relay_parent: &Hash) -> Vec { + fn peers_interested_in_relay_parent( + &self, + para_id: ParaId, + relay_parent: &Hash, + relay_parent_mode: ProspectiveParachainsMode, + ) -> Vec { self.peer_views .iter() - .filter(|(_, v)| v.contains(relay_parent)) + .filter(|(_, v)| match relay_parent_mode { + ProspectiveParachainsMode::Disabled => v.contains(relay_parent), + ProspectiveParachainsMode::Enabled => v.iter().any(|block_hash| { + self.implicit_view + .known_allowed_relay_parents_under(block_hash, Some(para_id)) + .unwrap_or_default() + .contains(relay_parent) + }), + }) .map(|(peer, _)| *peer) .collect() } @@ -369,27 +449,44 @@ async fn distribute_collation( state: &mut State, id: ParaId, receipt: CandidateReceipt, + parent_head_data_hash: Hash, pov: PoV, result_sender: Option>, ) -> Result<()> { - let relay_parent = receipt.descriptor.relay_parent; - - // This collation is not in the active-leaves set. - if !state.view.contains(&relay_parent) { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - "distribute collation message parent is outside of our view", - ); + let candidate_relay_parent = receipt.descriptor.relay_parent; + let candidate_hash = receipt.hash(); - return Ok(()) - } + let relay_parent_mode = match state.active_leaves.get(&candidate_relay_parent) { + Some(mode) => *mode, + None => { + // If candidate relay parent is not an active leaf, assume + // it's part of implicit ancestry. + if state.active_leaves.keys().any(|block_hash| { + state + .implicit_view + .known_allowed_relay_parents_under(block_hash, Some(id)) + .unwrap_or_default() + .contains(&candidate_relay_parent) + }) { + ProspectiveParachainsMode::Enabled + } else { + gum::debug!( + target: LOG_TARGET, + para_id = %id, + candidate_relay_parent = %candidate_relay_parent, + candidate_hash = ?candidate_hash, + "Candidate relay parent is out of our view", + ); + return Ok(()) + } + }, + }; // We have already seen collation for this relay parent. - if state.collations.contains_key(&relay_parent) { + if state.collations.contains_key(&candidate_relay_parent) { gum::debug!( target: LOG_TARGET, - ?relay_parent, + ?candidate_relay_parent, "Already seen collation for this relay parent", ); return Ok(()) @@ -397,23 +494,26 @@ async fn distribute_collation( // Determine which core the para collated-on is assigned to. // If it is not scheduled then ignore the message. - let (our_core, num_cores) = match determine_core(ctx.sender(), id, relay_parent).await? { - Some(core) => core, - None => { - gum::warn!( - target: LOG_TARGET, - para_id = %id, - ?relay_parent, - "looks like no core is assigned to {} at {}", id, relay_parent, - ); + let (our_core, num_cores) = + match determine_core(ctx.sender(), id, candidate_relay_parent, relay_parent_mode).await? { + Some(core) => core, + None => { + gum::warn!( + target: LOG_TARGET, + para_id = %id, + "looks like no core is assigned to {} at {}", id, candidate_relay_parent, + ); - return Ok(()) - }, - }; + return Ok(()) + }, + }; // Determine the group on that core. + // + // When prospective parachains are disabled, candidate relay parent here is + // guaranteed to be an active leaf. let current_validators = - determine_our_validators(ctx, runtime, our_core, num_cores, relay_parent).await?; + determine_our_validators(ctx, runtime, our_core, num_cores, candidate_relay_parent).await?; if current_validators.validators.is_empty() { gum::warn!( @@ -428,8 +528,9 @@ async fn distribute_collation( gum::debug!( target: LOG_TARGET, para_id = %id, - relay_parent = %relay_parent, - candidate_hash = ?receipt.hash(), + candidate_relay_parent = %candidate_relay_parent, + relay_parent_mode = ?relay_parent_mode, + candidate_hash = ?candidate_hash, pov_hash = ?pov.hash(), core = ?our_core, ?current_validators, @@ -439,20 +540,27 @@ async fn distribute_collation( // Issue a discovery request for the validators of the current group: connect_to_validators(ctx, current_validators.validators.into_iter().collect()).await; - state.our_validators_groups.insert(relay_parent, ValidatorGroup::new()); + state + .our_validators_groups + .insert(candidate_relay_parent, ValidatorGroup::new()); if let Some(result_sender) = result_sender { - state.collation_result_senders.insert(receipt.hash(), result_sender); + state.collation_result_senders.insert(candidate_hash, result_sender); } - state - .collations - .insert(relay_parent, Collation { receipt, pov, status: CollationStatus::Created }); + state.collations.insert( + candidate_relay_parent, + Collation { receipt, parent_head_data_hash, pov, status: CollationStatus::Created }, + ); + + // It's collation-producer responsibility to verify that there exists + // a hypothetical membership in a fragment tree for candidate. + let interested = + state.peers_interested_in_relay_parent(id, &candidate_relay_parent, relay_parent_mode); - let interested = state.peers_interested_in_leaf(&relay_parent); // Make sure already connected peers get collations: for peer_id in interested { - advertise_collation(ctx, state, relay_parent, peer_id).await; + advertise_collation(ctx, state, candidate_relay_parent, peer_id).await; } Ok(()) @@ -464,14 +572,26 @@ async fn determine_core( sender: &mut impl overseer::SubsystemSender, para_id: ParaId, relay_parent: Hash, + relay_parent_mode: ProspectiveParachainsMode, ) -> Result> { let cores = get_availability_cores(sender, relay_parent).await?; for (idx, core) in cores.iter().enumerate() { - if let CoreState::Scheduled(occupied) = core { - if occupied.para_id == para_id { - return Ok(Some(((idx as u32).into(), cores.len()))) - } + let core_para_id = match core { + CoreState::Scheduled(scheduled) => Some(scheduled.para_id), + CoreState::Occupied(occupied) => + if relay_parent_mode.is_enabled() { + // With async backing we don't care about the core state, + // it is only needed for figuring our validators group. + Some(occupied.candidate_descriptor.para_id) + } else { + None + }, + CoreState::Free => None, + }; + + if core_para_id == Some(para_id) { + return Ok(Some(((idx as u32).into(), cores.len()))) } } @@ -635,12 +755,13 @@ async fn process_msg( CollateOn(id) => { state.collating_on = Some(id); }, - DistributeCollation(receipt, pov, result_sender) => { + DistributeCollation(receipt, parent_head_data_hash, pov, result_sender) => { let _span1 = state .span_per_relay_parent .get(&receipt.descriptor.relay_parent) .map(|s| s.child("distributing-collation")); let _span2 = jaeger::Span::new(&pov, "distributing-collation"); + match state.collating_on { Some(id) if receipt.descriptor.para_id != id => { // If the ParaId of a collation requested to be distributed does not match @@ -654,8 +775,17 @@ async fn process_msg( }, Some(id) => { let _ = state.metrics.time_collation_distribution("distribute"); - distribute_collation(ctx, runtime, state, id, receipt, pov, result_sender) - .await?; + distribute_collation( + ctx, + runtime, + state, + id, + receipt, + parent_head_data_hash, + pov, + result_sender, + ) + .await?; }, None => { gum::warn!( @@ -941,7 +1071,7 @@ async fn handle_network_msg( }, OurViewChange(view) => { gum::trace!(target: LOG_TARGET, ?view, "Own view change"); - handle_our_view_change(state, view).await?; + handle_our_view_change(ctx.sender(), state, view).await?; }, PeerMessage(remote, Versioned::V1(msg)) => { handle_incoming_peer_message(ctx, runtime, state, remote, msg).await?; @@ -955,41 +1085,74 @@ async fn handle_network_msg( } /// Handles our view changes. -async fn handle_our_view_change(state: &mut State, view: OurView) -> Result<()> { - for removed in state.view.difference(&view) { - gum::debug!(target: LOG_TARGET, relay_parent = ?removed, "Removing relay parent because our view changed."); +async fn handle_our_view_change( + sender: &mut Sender, + state: &mut State, + view: OurView, +) -> Result<()> +where + Sender: CollatorProtocolSenderTrait, +{ + let current_leaves = state.active_leaves.clone(); + + let removed = current_leaves.iter().filter(|(h, _)| !view.contains(*h)); + let added = view.iter().filter(|h| !current_leaves.contains_key(h)); + + for leaf in added { + let mode = prospective_parachains_mode(sender, *leaf).await?; + + state.active_leaves.insert(*leaf, mode); + + if mode.is_enabled() { + state + .implicit_view + .activate_leaf(sender, *leaf) + .await + .map_err(Error::ImplicitViewFetchError)?; + } + } - if let Some(collation) = state.collations.remove(removed) { - state.collation_result_senders.remove(&collation.receipt.hash()); + for (leaf, mode) in removed { + // If the leaf is deactivated it still may stay in the view as a part + // of implicit ancestry. Only update the state after the hash is actually + // pruned from the block info storage. + let pruned = if mode.is_enabled() { + state.implicit_view.deactivate_leaf(*leaf) + } else { + vec![*leaf] + }; - match collation.status { - CollationStatus::Created => gum::warn!( - target: LOG_TARGET, - candidate_hash = ?collation.receipt.hash(), - pov_hash = ?collation.pov.hash(), - "Collation wasn't advertised to any validator.", - ), - CollationStatus::Advertised => gum::debug!( - target: LOG_TARGET, - candidate_hash = ?collation.receipt.hash(), - pov_hash = ?collation.pov.hash(), - "Collation was advertised but not requested by any validator.", - ), - CollationStatus::Requested => gum::debug!( - target: LOG_TARGET, - candidate_hash = ?collation.receipt.hash(), - pov_hash = ?collation.pov.hash(), - "Collation was requested.", - ), + for removed in &pruned { + gum::debug!(target: LOG_TARGET, relay_parent = ?removed, "Removing relay parent because our view changed."); + if let Some(collation) = state.collations.remove(removed) { + state.collation_result_senders.remove(&collation.receipt.hash()); + + match collation.status { + CollationStatus::Created => gum::warn!( + target: LOG_TARGET, + candidate_hash = ?collation.receipt.hash(), + pov_hash = ?collation.pov.hash(), + "Collation wasn't advertised to any validator.", + ), + CollationStatus::Advertised => gum::debug!( + target: LOG_TARGET, + candidate_hash = ?collation.receipt.hash(), + pov_hash = ?collation.pov.hash(), + "Collation was advertised but not requested by any validator.", + ), + CollationStatus::Requested => gum::debug!( + target: LOG_TARGET, + candidate_hash = ?collation.receipt.hash(), + pov_hash = ?collation.pov.hash(), + "Collation was requested.", + ), + } } + state.our_validators_groups.remove(removed); + state.span_per_relay_parent.remove(removed); + state.waiting_collation_fetches.remove(removed); } - state.our_validators_groups.remove(removed); - state.span_per_relay_parent.remove(removed); - state.waiting_collation_fetches.remove(removed); } - - state.view = view; - Ok(()) } diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index b1c86fa81c5a..233f747bbcaf 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -17,10 +17,12 @@ //! Error handling related code and Error/Result definitions. +use futures::channel::oneshot; + use polkadot_node_network_protocol::request_response::incoming; use polkadot_node_primitives::UncheckedSignedFullStatement; -use polkadot_node_subsystem::errors::SubsystemError; -use polkadot_node_subsystem_util::runtime; +use polkadot_node_subsystem::{errors::SubsystemError, RuntimeApiError}; +use polkadot_node_subsystem_util::{backing_implicit_view, runtime}; use crate::LOG_TARGET; @@ -44,6 +46,18 @@ pub enum Error { #[error("Error while accessing runtime information")] Runtime(#[from] runtime::Error), + #[error("Error while accessing Runtime API")] + RuntimeApi(#[from] RuntimeApiError), + + #[error(transparent)] + ImplicitViewFetchError(backing_implicit_view::FetchError), + + #[error("Response receiver for hypothetical depth request cancelled")] + CancelledGetHypotheticalDepth(oneshot::Canceled), + + #[error("Response receiver for Runtime API version request cancelled")] + CancelledRuntimeApiVersion(oneshot::Canceled), + #[error("CollationSeconded contained statement with invalid signature")] InvalidStatementSignature(UncheckedSignedFullStatement), } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 30ff333b40fb..0e6816d7bfa9 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -1109,7 +1109,7 @@ async fn process_msg( "CollateOn message is not expected on the validator side of the protocol", ); }, - DistributeCollation(_, _, _) => { + DistributeCollation(..) => { gum::warn!( target: LOG_TARGET, "DistributeCollation message is not expected on the validator side of the protocol", diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs index 9e95ca8af03e..6c97c96af7ed 100644 --- a/node/overseer/src/lib.rs +++ b/node/overseer/src/lib.rs @@ -556,6 +556,8 @@ pub struct Overseer { NetworkBridgeMessage, RuntimeApiMessage, CandidateBackingMessage, + ChainApiMessage, + ProspectiveParachainsMessage ])] collator_protocol: CollatorProtocol, diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 9652cff20ba7..85adc5d8f4d3 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -195,10 +195,16 @@ pub enum CollatorProtocolMessage { /// This should be sent before any `DistributeCollation` message. CollateOn(ParaId), /// Provide a collation to distribute to validators with an optional result sender. + /// The second argument is the hash of parachain head before candidate's execution. /// /// The result sender should be informed when at least one parachain validator seconded the collation. It is also /// completely okay to just drop the sender. - DistributeCollation(CandidateReceipt, PoV, Option>), + DistributeCollation( + CandidateReceipt, + Hash, + PoV, + Option>, + ), /// Report a collator as having provided an invalid collation. This should lead to disconnect /// and blacklist of the collator. ReportCollator(CollatorId), diff --git a/node/subsystem-util/src/backing_implicit_view.rs b/node/subsystem-util/src/backing_implicit_view.rs index dc10efe519fe..d3734e73c14f 100644 --- a/node/subsystem-util/src/backing_implicit_view.rs +++ b/node/subsystem-util/src/backing_implicit_view.rs @@ -100,6 +100,11 @@ struct BlockInfo { } impl View { + /// Get an iterator over active leaves in the view. + pub fn leaves<'a>(&'a self) -> impl Iterator + 'a { + self.leaves.keys() + } + /// Activate a leaf in the view. /// This will request the minimum relay parents from the /// Prospective Parachains subsystem for each leaf and will load headers in the ancestry of each @@ -152,9 +157,13 @@ impl View { } /// Deactivate a leaf in the view. This prunes any outdated implicit ancestors as well. - pub fn deactivate_leaf(&mut self, leaf_hash: Hash) { + /// + /// Returns hashes of blocks pruned from storage. + pub fn deactivate_leaf(&mut self, leaf_hash: Hash) -> Vec { + let mut removed = Vec::new(); + if self.leaves.remove(&leaf_hash).is_none() { - return + return removed } // Prune everything before the minimum out of all leaves, @@ -165,8 +174,15 @@ impl View { { let minimum = self.leaves.values().map(|l| l.retain_minimum).min(); - self.block_info_storage - .retain(|_, i| minimum.map_or(false, |m| i.block_number >= m)); + self.block_info_storage.retain(|hash, i| { + let keep = minimum.map_or(false, |m| i.block_number >= m); + if !keep { + removed.push(*hash); + } + keep + }); + + removed } } @@ -212,17 +228,26 @@ impl View { } /// Errors when fetching a leaf and associated ancestry. -#[derive(Debug)] +#[fatality::fatality] pub enum FetchError { - /// Leaf was already known. + /// Activated leaf is already present in view. + #[error("Leaf was already known")] AlreadyKnown, - /// The prospective parachains subsystem was unavailable. + + /// Request to the prospective parachains subsystem failed. + #[error("The prospective parachains subsystem was unavailable")] ProspectiveParachainsUnavailable, - /// A block header was unavailable. + + /// Failed to fetch the block header. + #[error("A block header was unavailable")] BlockHeaderUnavailable(Hash, BlockHeaderUnavailableReason), + /// A block header was unavailable due to a chain API error. + #[error("A block header was unavailable due to a chain API error")] ChainApiError(Hash, ChainApiError), - /// The chain API subsystem was unavailable. + + /// Request to the Chain API subsystem failed. + #[error("The chain API subsystem was unavailable")] ChainApiUnavailable, } From 943103cd3b08207d686c42e1b51b52ec70435c68 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 13 Jul 2022 23:54:44 +0300 Subject: [PATCH 02/45] Start working on collations management --- Cargo.lock | 2 + node/network/collator-protocol/Cargo.toml | 2 + .../src/collator_side/mod.rs | 195 ++++++++++++------ 3 files changed, 138 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a51ee4e6ce59..1fb6fd157643 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6360,7 +6360,9 @@ name = "polkadot-collator-protocol" version = "0.9.25" dependencies = [ "always-assert", + "arrayvec 0.5.2", "assert_matches", + "bitvec 1.0.0", "env_logger", "fatality", "futures", diff --git a/node/network/collator-protocol/Cargo.toml b/node/network/collator-protocol/Cargo.toml index 46a4f9800602..fa3deb4c7d00 100644 --- a/node/network/collator-protocol/Cargo.toml +++ b/node/network/collator-protocol/Cargo.toml @@ -9,6 +9,8 @@ always-assert = "0.1.2" futures = "0.3.21" futures-timer = "3" gum = { package = "tracing-gum", path = "../../gum" } +bitvec = { version = "1.0.0", default-features = false, features = ["alloc"] } +arrayvec = "0.5.2" sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index d34a776af200..9b0ceb7ee1b4 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -20,6 +20,7 @@ use std::{ time::Duration, }; +use bitvec::prelude::*; use futures::{ channel::oneshot, pin_mut, select, stream::FuturesUnordered, Future, FutureExt, StreamExt, }; @@ -39,8 +40,8 @@ use polkadot_node_primitives::{CollationSecondedSignal, PoV, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ - CollatorProtocolMessage, HypotheticalDepthRequest, NetworkBridgeEvent, - NetworkBridgeMessage, ProspectiveParachainsMessage, RuntimeApiMessage, RuntimeApiRequest, + CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, + RuntimeApiRequest, }, overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; @@ -75,6 +76,15 @@ const COST_APPARENT_FLOOD: Rep = /// For considerations on this value, see: https://github.com/paritytech/polkadot/issues/4386 const MAX_UNSHARED_UPLOAD_TIME: Duration = Duration::from_millis(150); +/// The maximum depth a candidate can occupy for any relay parent. +/// 'depth' is defined as the amount of blocks between the para +/// head in a relay-chain block's state and a candidate with a +/// particular relay-parent. +/// +/// This value is only used for limiting the number of candidates +/// we accept and distribute per relay parent. +const MAX_CANDIDATE_DEPTH: usize = 4; + #[derive(Clone, Default)] pub struct Metrics(Option); @@ -184,42 +194,67 @@ impl metrics::Metrics for Metrics { /// Info about validators we are currently connected to. /// /// It keeps track to which validators we advertised our collation. -#[derive(Debug)] +#[derive(Debug, Default)] struct ValidatorGroup { - /// All [`ValidatorId`]'s of the current group to that we advertised our collation. - advertised_to: HashSet, + validators: Vec, + + advertised_to: HashMap>, } impl ValidatorGroup { - /// Create a new `ValidatorGroup` - /// - /// without any advertisements. - fn new() -> Self { - Self { advertised_to: HashSet::new() } - } - /// Returns `true` if we should advertise our collation to the given peer. fn should_advertise_to( &self, + candidate_hash: &CandidateHash, peer_ids: &HashMap>, peer: &PeerId, ) -> bool { - match peer_ids.get(peer) { - Some(discovery_ids) => !discovery_ids.iter().any(|d| self.advertised_to.contains(d)), - None => false, + let authority_ids = match peer_ids.get(peer) { + Some(authority_ids) => authority_ids, + None => return false, + }; + + for id in authority_ids { + // One peer id may correspond to different discovery ids across sessions, + // having a non-empty intersection is sufficient to assume that this peer + // belongs to this particular validator group. + let validator_index = match self.validators.iter().position(|v| v == id) { + Some(idx) => idx, + None => continue, + }; + + // Either the candidate is unseen by this validator group + // or the corresponding bit is not set. + if self + .advertised_to + .get(candidate_hash) + .map_or(true, |advertised| !advertised[validator_index]) + { + return true + } } + + false } /// Should be called after we advertised our collation to the given `peer` to keep track of it. fn advertised_to_peer( &mut self, + candidate_hash: &CandidateHash, peer_ids: &HashMap>, peer: &PeerId, ) { if let Some(authority_ids) = peer_ids.get(peer) { - authority_ids.iter().for_each(|a| { - self.advertised_to.insert(a.clone()); - }); + for id in authority_ids { + let validator_index = match self.validators.iter().position(|v| v == id) { + Some(idx) => idx, + None => continue, + }; + self.advertised_to + .entry(*candidate_hash) + .or_insert_with(|| bitvec![u8, Lsb0; 0; self.validators.len()]) + .set(validator_index, true); + } } } } @@ -325,6 +360,14 @@ where } } +struct PerRelayParent { + prospective_parachains_mode: ProspectiveParachainsMode, + + validator_group: ValidatorGroup, + + collations: arrayvec::ArrayVec<[Collation; MAX_CANDIDATE_DEPTH + 1]>, +} + struct State { /// Our network peer id. local_peer_id: PeerId, @@ -356,20 +399,14 @@ struct State { /// to asynchronous backing is done. active_leaves: HashMap, + per_relay_parent: HashMap, + /// Span per relay parent. span_per_relay_parent: HashMap, - /// Possessed collations. - /// - /// We will keep up to one local collation per relay-parent. - collations: HashMap, - /// The result senders per collation. collation_result_senders: HashMap>, - /// Our validator groups per active leaf. - our_validators_groups: HashMap, - /// The mapping from [`PeerId`] to [`HashSet`]. This is filled over time as we learn the [`PeerId`]'s /// by `PeerConnected` events. peer_ids: HashMap>, @@ -401,10 +438,9 @@ impl State { peer_views: Default::default(), implicit_view: Default::default(), active_leaves: Default::default(), + per_relay_parent: Default::default(), span_per_relay_parent: Default::default(), - collations: Default::default(), collation_result_senders: Default::default(), - our_validators_groups: Default::default(), peer_ids: Default::default(), waiting_collation_fetches: Default::default(), active_collation_fetches: Default::default(), @@ -456,34 +492,38 @@ async fn distribute_collation( let candidate_relay_parent = receipt.descriptor.relay_parent; let candidate_hash = receipt.hash(); - let relay_parent_mode = match state.active_leaves.get(&candidate_relay_parent) { - Some(mode) => *mode, + let per_relay_parent = match state.per_relay_parent.get_mut(&candidate_relay_parent) { + Some(per_relay_parent) => per_relay_parent, None => { - // If candidate relay parent is not an active leaf, assume - // it's part of implicit ancestry. - if state.active_leaves.keys().any(|block_hash| { - state - .implicit_view - .known_allowed_relay_parents_under(block_hash, Some(id)) - .unwrap_or_default() - .contains(&candidate_relay_parent) - }) { - ProspectiveParachainsMode::Enabled - } else { - gum::debug!( - target: LOG_TARGET, - para_id = %id, - candidate_relay_parent = %candidate_relay_parent, - candidate_hash = ?candidate_hash, - "Candidate relay parent is out of our view", - ); - return Ok(()) - } + gum::debug!( + target: LOG_TARGET, + para_id = %id, + candidate_relay_parent = %candidate_relay_parent, + candidate_hash = ?candidate_hash, + "Candidate relay parent is out of our view", + ); + return Ok(()) }, }; + let relay_parent_mode = per_relay_parent.prospective_parachains_mode; + + if per_relay_parent.collations.is_full() { + gum::debug!( + target: LOG_TARGET, + ?candidate_relay_parent, + "The limit of {} collations per relay parent is already reached", + MAX_CANDIDATE_DEPTH + 1, + ); + return Ok(()) + } // We have already seen collation for this relay parent. - if state.collations.contains_key(&candidate_relay_parent) { + if per_relay_parent + .collations + .iter() + .find(|&collation| collation.receipt.hash() == candidate_hash) + .is_some() + { gum::debug!( target: LOG_TARGET, ?candidate_relay_parent, @@ -537,21 +577,28 @@ async fn distribute_collation( "Accepted collation, connecting to validators." ); + let validators_at_relay_parent = &mut per_relay_parent.validator_group.validators; + if validators_at_relay_parent.is_empty() { + *validators_at_relay_parent = current_validators.validators.clone(); + } + // Issue a discovery request for the validators of the current group: + // + // TODO [now]: some kind of connection management is necessary to avoid + // dropping peers from e.g. implicit view assignments. connect_to_validators(ctx, current_validators.validators.into_iter().collect()).await; - state - .our_validators_groups - .insert(candidate_relay_parent, ValidatorGroup::new()); - if let Some(result_sender) = result_sender { state.collation_result_senders.insert(candidate_hash, result_sender); } - state.collations.insert( - candidate_relay_parent, - Collation { receipt, parent_head_data_hash, pov, status: CollationStatus::Created }, - ); + // safety: doesn't panic since we check for collations limit above. + per_relay_parent.collations.push(Collation { + receipt, + parent_head_data_hash, + pov, + status: CollationStatus::Created, + }); // It's collation-producer responsibility to verify that there exists // a hypothetical membership in a fragment tree for candidate. @@ -1102,6 +1149,14 @@ where let mode = prospective_parachains_mode(sender, *leaf).await?; state.active_leaves.insert(*leaf, mode); + state.per_relay_parent.insert( + *leaf, + PerRelayParent { + prospective_parachains_mode: mode, + validator_group: ValidatorGroup::default(), + collations: arrayvec::ArrayVec::new(), + }, + ); if mode.is_enabled() { state @@ -1109,6 +1164,18 @@ where .activate_leaf(sender, *leaf) .await .map_err(Error::ImplicitViewFetchError)?; + + let allowed_ancestry = state + .implicit_view + .known_allowed_relay_parents_under(leaf, state.collating_on) + .unwrap_or_default(); + for block_hash in allowed_ancestry { + state.per_relay_parent.entry(*block_hash).or_insert_with(|| PerRelayParent { + prospective_parachains_mode: ProspectiveParachainsMode::Enabled, + validator_group: ValidatorGroup::default(), + collations: arrayvec::ArrayVec::new(), + }); + } } } @@ -1124,7 +1191,13 @@ where for removed in &pruned { gum::debug!(target: LOG_TARGET, relay_parent = ?removed, "Removing relay parent because our view changed."); - if let Some(collation) = state.collations.remove(removed) { + + let collations = state + .per_relay_parent + .get_mut(removed) + .map(|per_relay_parent| std::mem::take(&mut per_relay_parent.collations)) + .unwrap_or_default(); + for collation in collations { state.collation_result_senders.remove(&collation.receipt.hash()); match collation.status { @@ -1148,7 +1221,7 @@ where ), } } - state.our_validators_groups.remove(removed); + state.per_relay_parent.remove(removed); state.span_per_relay_parent.remove(removed); state.waiting_collation_fetches.remove(removed); } From 2fb910f1f3327e569552bc49a180612f64c814f7 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 14 Jul 2022 17:18:41 +0300 Subject: [PATCH 03/45] Handle peer's view change --- Cargo.lock | 1 - node/network/collator-protocol/Cargo.toml | 1 - .../src/collator_side/mod.rs | 225 ++++++++++-------- 3 files changed, 126 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1fb6fd157643..a0d0d2a765fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6360,7 +6360,6 @@ name = "polkadot-collator-protocol" version = "0.9.25" dependencies = [ "always-assert", - "arrayvec 0.5.2", "assert_matches", "bitvec 1.0.0", "env_logger", diff --git a/node/network/collator-protocol/Cargo.toml b/node/network/collator-protocol/Cargo.toml index fa3deb4c7d00..4074321618fe 100644 --- a/node/network/collator-protocol/Cargo.toml +++ b/node/network/collator-protocol/Cargo.toml @@ -10,7 +10,6 @@ futures = "0.3.21" futures-timer = "3" gum = { package = "tracing-gum", path = "../../gum" } bitvec = { version = "1.0.0", default-features = false, features = ["alloc"] } -arrayvec = "0.5.2" sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 9b0ceb7ee1b4..3e359d1b9f8b 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -196,8 +196,12 @@ impl metrics::Metrics for Metrics { /// It keeps track to which validators we advertised our collation. #[derive(Debug, Default)] struct ValidatorGroup { + /// Validators discovery ids. Lazily initialized when first + /// distributing a collation. validators: Vec, + /// Bits indicating which validators have already seen the announcement + /// per candidate. advertised_to: HashMap>, } @@ -362,10 +366,21 @@ where struct PerRelayParent { prospective_parachains_mode: ProspectiveParachainsMode, - + /// Validators group responsible for backing candidates built + /// on top of this relay parent. validator_group: ValidatorGroup, + /// Distributed collations. + collations: HashMap, +} - collations: arrayvec::ArrayVec<[Collation; MAX_CANDIDATE_DEPTH + 1]>, +impl PerRelayParent { + fn new(mode: ProspectiveParachainsMode) -> Self { + Self { + prospective_parachains_mode: mode, + validator_group: ValidatorGroup::default(), + collations: HashMap::new(), + } + } } struct State { @@ -399,6 +414,8 @@ struct State { /// to asynchronous backing is done. active_leaves: HashMap, + /// Validators and distributed collations tracked for each relay parent from + /// our view, including both leaves and implicit ancestry. per_relay_parent: HashMap, /// Span per relay parent. @@ -446,28 +463,6 @@ impl State { active_collation_fetches: Default::default(), } } - - /// Get all peers which have the given relay parent in their view. - fn peers_interested_in_relay_parent( - &self, - para_id: ParaId, - relay_parent: &Hash, - relay_parent_mode: ProspectiveParachainsMode, - ) -> Vec { - self.peer_views - .iter() - .filter(|(_, v)| match relay_parent_mode { - ProspectiveParachainsMode::Disabled => v.contains(relay_parent), - ProspectiveParachainsMode::Enabled => v.iter().any(|block_hash| { - self.implicit_view - .known_allowed_relay_parents_under(block_hash, Some(para_id)) - .unwrap_or_default() - .contains(relay_parent) - }), - }) - .map(|(peer, _)| *peer) - .collect() - } } /// Distribute a collation. @@ -507,23 +502,23 @@ async fn distribute_collation( }; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; - if per_relay_parent.collations.is_full() { + let collations_limit = match relay_parent_mode { + ProspectiveParachainsMode::Disabled => 1, + ProspectiveParachainsMode::Enabled => MAX_CANDIDATE_DEPTH + 1, + }; + + if per_relay_parent.collations.len() >= collations_limit { gum::debug!( target: LOG_TARGET, ?candidate_relay_parent, "The limit of {} collations per relay parent is already reached", - MAX_CANDIDATE_DEPTH + 1, + collations_limit, ); return Ok(()) } // We have already seen collation for this relay parent. - if per_relay_parent - .collations - .iter() - .find(|&collation| collation.receipt.hash() == candidate_hash) - .is_some() - { + if per_relay_parent.collations.contains_key(&candidate_hash) { gum::debug!( target: LOG_TARGET, ?candidate_relay_parent, @@ -592,22 +587,40 @@ async fn distribute_collation( state.collation_result_senders.insert(candidate_hash, result_sender); } - // safety: doesn't panic since we check for collations limit above. - per_relay_parent.collations.push(Collation { - receipt, - parent_head_data_hash, - pov, - status: CollationStatus::Created, - }); + per_relay_parent.collations.insert( + candidate_hash, + Collation { receipt, parent_head_data_hash, pov, status: CollationStatus::Created }, + ); // It's collation-producer responsibility to verify that there exists // a hypothetical membership in a fragment tree for candidate. - let interested = - state.peers_interested_in_relay_parent(id, &candidate_relay_parent, relay_parent_mode); + let interested: Vec = state + .peer_views + .iter() + .filter(|(_, v)| match relay_parent_mode { + ProspectiveParachainsMode::Disabled => v.contains(&candidate_relay_parent), + ProspectiveParachainsMode::Enabled => v.iter().any(|block_hash| { + state + .implicit_view + .known_allowed_relay_parents_under(block_hash, Some(id)) + .unwrap_or_default() + .contains(&candidate_relay_parent) + }), + }) + .map(|(peer, _)| *peer) + .collect(); // Make sure already connected peers get collations: for peer_id in interested { - advertise_collation(ctx, state, candidate_relay_parent, peer_id).await; + advertise_collation( + ctx, + candidate_relay_parent, + per_relay_parent, + &peer_id, + &state.peer_ids, + &state.metrics, + ) + .await; } Ok(()) @@ -733,59 +746,51 @@ async fn connect_to_validators( #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] async fn advertise_collation( ctx: &mut Context, - state: &mut State, relay_parent: Hash, - peer: PeerId, + per_relay_parent: &mut PerRelayParent, + peer: &PeerId, + peer_ids: &HashMap>, + metrics: &Metrics, ) { - let should_advertise = state - .our_validators_groups - .get(&relay_parent) - .map(|g| g.should_advertise_to(&state.peer_ids, &peer)) - .unwrap_or(false); - - match (state.collations.get_mut(&relay_parent), should_advertise) { - (None, _) => { - gum::trace!( - target: LOG_TARGET, - ?relay_parent, - peer_id = %peer, - "No collation to advertise.", - ); - return - }, - (_, false) => { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - peer_id = %peer, - "Not advertising collation as we already advertised it to this validator.", - ); - return - }, - (Some(collation), true) => { + for (candidate_hash, collation) in per_relay_parent.collations.iter_mut() { + let should_advertise = + per_relay_parent + .validator_group + .should_advertise_to(candidate_hash, peer_ids, &peer); + + if !should_advertise { gum::debug!( target: LOG_TARGET, ?relay_parent, peer_id = %peer, - "Advertising collation.", + "Not advertising collation since validator is not interested", ); - collation.status.advance_to_advertised() - }, - } + continue + } - let wire_message = protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + peer_id = %peer, + "Advertising collation.", + ); + collation.status.advance_to_advertised(); - ctx.send_message(NetworkBridgeMessage::SendCollationMessage( - vec![peer.clone()], - Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), - )) - .await; + // TODO [now]: versioned wire message. + let wire_message = protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); - if let Some(validators) = state.our_validators_groups.get_mut(&relay_parent) { - validators.advertised_to_peer(&state.peer_ids, &peer); - } + ctx.send_message(NetworkBridgeMessage::SendCollationMessage( + vec![peer.clone()], + Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), + )) + .await; - state.metrics.on_advertisment_made(); + per_relay_parent + .validator_group + .advertised_to_peer(candidate_hash, &peer_ids, peer); + + metrics.on_advertisment_made(); + } } /// The main incoming message dispatching switch. @@ -1076,7 +1081,37 @@ async fn handle_peer_view_change( *current = view; for added in added.into_iter() { - advertise_collation(ctx, state, added, peer_id.clone()).await; + let block_hashes = match state + .per_relay_parent + .get(&added) + .map(|per_relay_parent| per_relay_parent.prospective_parachains_mode) + { + Some(ProspectiveParachainsMode::Disabled) => std::slice::from_ref(&added), + Some(ProspectiveParachainsMode::Enabled) => state + .implicit_view + .known_allowed_relay_parents_under(&added, state.collating_on) + .unwrap_or_default(), + None => { + // Added leaf is unknown. + continue + }, + }; + + for block_hash in block_hashes { + let per_relay_parent = match state.per_relay_parent.get_mut(block_hash) { + Some(per_relay_parent) => per_relay_parent, + None => continue, + }; + advertise_collation( + ctx, + *block_hash, + per_relay_parent, + &peer_id, + &state.peer_ids, + &state.metrics, + ) + .await; + } } } @@ -1149,14 +1184,7 @@ where let mode = prospective_parachains_mode(sender, *leaf).await?; state.active_leaves.insert(*leaf, mode); - state.per_relay_parent.insert( - *leaf, - PerRelayParent { - prospective_parachains_mode: mode, - validator_group: ValidatorGroup::default(), - collations: arrayvec::ArrayVec::new(), - }, - ); + state.per_relay_parent.insert(*leaf, PerRelayParent::new(mode)); if mode.is_enabled() { state @@ -1170,11 +1198,10 @@ where .known_allowed_relay_parents_under(leaf, state.collating_on) .unwrap_or_default(); for block_hash in allowed_ancestry { - state.per_relay_parent.entry(*block_hash).or_insert_with(|| PerRelayParent { - prospective_parachains_mode: ProspectiveParachainsMode::Enabled, - validator_group: ValidatorGroup::default(), - collations: arrayvec::ArrayVec::new(), - }); + state + .per_relay_parent + .entry(*block_hash) + .or_insert_with(|| PerRelayParent::new(ProspectiveParachainsMode::Enabled)); } } } @@ -1197,7 +1224,7 @@ where .get_mut(removed) .map(|per_relay_parent| std::mem::take(&mut per_relay_parent.collations)) .unwrap_or_default(); - for collation in collations { + for collation in collations.into_values() { state.collation_result_senders.remove(&collation.receipt.hash()); match collation.status { From 7766a66f71f9b69d9aa5039ebc8152aef3758c39 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 15 Jul 2022 17:05:55 +0300 Subject: [PATCH 04/45] Versioning on advertising --- .../src/collator_side/mod.rs | 24 +- node/network/protocol/src/lib.rs | 240 ++++++++++++++++-- node/network/protocol/src/peer_set.rs | 56 +++- 3 files changed, 294 insertions(+), 26 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 3e359d1b9f8b..19724ee2d23f 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -34,7 +34,8 @@ use polkadot_node_network_protocol::{ v1::{self as request_v1, CollationFetchingRequest, CollationFetchingResponse}, IncomingRequest, IncomingRequestReceiver, }, - v1 as protocol_v1, OurView, PeerId, UnifiedReputationChange as Rep, Versioned, View, + v1 as protocol_v1, vstaging as protocol_vstaging, OurView, PeerId, + UnifiedReputationChange as Rep, Versioned, View, }; use polkadot_node_primitives::{CollationSecondedSignal, PoV, Statement}; use polkadot_node_subsystem::{ @@ -776,12 +777,27 @@ async fn advertise_collation( ); collation.status.advance_to_advertised(); - // TODO [now]: versioned wire message. - let wire_message = protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); + let collation_message = match per_relay_parent.prospective_parachains_mode { + ProspectiveParachainsMode::Enabled => { + let wire_message = protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { + relay_parent, + candidate_hash: *candidate_hash, + parent_head_data_hash: collation.parent_head_data_hash, + }; + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + wire_message, + )) + }, + ProspectiveParachainsMode::Disabled => { + let wire_message = + protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); + Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)) + }, + }; ctx.send_message(NetworkBridgeMessage::SendCollationMessage( vec![peer.clone()], - Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), + collation_message, )) .await; diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs index 78727ae67e83..160cb6fc3c48 100644 --- a/node/network/protocol/src/lib.rs +++ b/node/network/protocol/src/lib.rs @@ -253,22 +253,26 @@ impl View { /// A protocol-versioned type. #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Versioned { +pub enum Versioned { /// V1 type. V1(V1), + /// VStaging type. + VStaging(VStaging), } -impl Versioned<&'_ V1> { +impl Versioned<&'_ V1, &'_ VStaging> { /// Convert to a fully-owned version of the message. - pub fn clone_inner(&self) -> Versioned { + pub fn clone_inner(&self) -> Versioned { match *self { Versioned::V1(inner) => Versioned::V1(inner.clone()), + Versioned::VStaging(inner) => Versioned::VStaging(inner.clone()), } } } /// All supported versions of the validation protocol message. -pub type VersionedValidationProtocol = Versioned; +pub type VersionedValidationProtocol = + Versioned; impl From for VersionedValidationProtocol { fn from(v1: v1::ValidationProtocol) -> Self { @@ -276,8 +280,14 @@ impl From for VersionedValidationProtocol { } } +impl From for VersionedValidationProtocol { + fn from(vstaging: vstaging::ValidationProtocol) -> Self { + VersionedValidationProtocol::VStaging(vstaging) + } +} + /// All supported versions of the collation protocol message. -pub type VersionedCollationProtocol = Versioned; +pub type VersionedCollationProtocol = Versioned; impl From for VersionedCollationProtocol { fn from(v1: v1::CollationProtocol) -> Self { @@ -285,12 +295,19 @@ impl From for VersionedCollationProtocol { } } +impl From for VersionedCollationProtocol { + fn from(vstaging: vstaging::CollationProtocol) -> Self { + VersionedCollationProtocol::VStaging(vstaging) + } +} + macro_rules! impl_versioned_full_protocol_from { ($from:ty, $out:ty, $variant:ident) => { impl From<$from> for $out { fn from(versioned_from: $from) -> $out { match versioned_from { Versioned::V1(x) => Versioned::V1(x.into()), + Versioned::VStaging(x) => Versioned::VStaging(x.into()), } } } @@ -300,7 +317,12 @@ macro_rules! impl_versioned_full_protocol_from { /// Implement `TryFrom` for one versioned enum variant into the inner type. /// `$m_ty::$variant(inner) -> Ok(inner)` macro_rules! impl_versioned_try_from { - ($from:ty, $out:ty, $v1_pat:pat => $v1_out:expr) => { + ( + $from:ty, + $out:ty, + $v1_pat:pat => $v1_out:expr, + $vstaging_pat:pat => $vstaging_out:expr + ) => { impl TryFrom<$from> for $out { type Error = crate::WrongVariant; @@ -308,6 +330,7 @@ macro_rules! impl_versioned_try_from { #[allow(unreachable_patterns)] // when there is only one variant match x { Versioned::V1($v1_pat) => Ok(Versioned::V1($v1_out)), + Versioned::VStaging($vstaging_pat) => Ok(Versioned::VStaging($vstaging_out)), _ => Err(crate::WrongVariant), } } @@ -320,6 +343,8 @@ macro_rules! impl_versioned_try_from { #[allow(unreachable_patterns)] // when there is only one variant match x { Versioned::V1($v1_pat) => Ok(Versioned::V1($v1_out.clone())), + Versioned::VStaging($vstaging_pat) => + Ok(Versioned::VStaging($vstaging_out.clone())), _ => Err(crate::WrongVariant), } } @@ -328,7 +353,8 @@ macro_rules! impl_versioned_try_from { } /// Version-annotated messages used by the bitfield distribution subsystem. -pub type BitfieldDistributionMessage = Versioned; +pub type BitfieldDistributionMessage = + Versioned; impl_versioned_full_protocol_from!( BitfieldDistributionMessage, VersionedValidationProtocol, @@ -337,11 +363,13 @@ impl_versioned_full_protocol_from!( impl_versioned_try_from!( VersionedValidationProtocol, BitfieldDistributionMessage, - v1::ValidationProtocol::BitfieldDistribution(x) => x + v1::ValidationProtocol::BitfieldDistribution(x) => x, + vstaging::ValidationProtocol::BitfieldDistribution(x) => x ); /// Version-annotated messages used by the statement distribution subsystem. -pub type StatementDistributionMessage = Versioned; +pub type StatementDistributionMessage = + Versioned; impl_versioned_full_protocol_from!( StatementDistributionMessage, VersionedValidationProtocol, @@ -350,11 +378,13 @@ impl_versioned_full_protocol_from!( impl_versioned_try_from!( VersionedValidationProtocol, StatementDistributionMessage, - v1::ValidationProtocol::StatementDistribution(x) => x + v1::ValidationProtocol::StatementDistribution(x) => x, + vstaging::ValidationProtocol::StatementDistribution(x) => x ); /// Version-annotated messages used by the approval distribution subsystem. -pub type ApprovalDistributionMessage = Versioned; +pub type ApprovalDistributionMessage = + Versioned; impl_versioned_full_protocol_from!( ApprovalDistributionMessage, VersionedValidationProtocol, @@ -363,11 +393,14 @@ impl_versioned_full_protocol_from!( impl_versioned_try_from!( VersionedValidationProtocol, ApprovalDistributionMessage, - v1::ValidationProtocol::ApprovalDistribution(x) => x + v1::ValidationProtocol::ApprovalDistribution(x) => x, + vstaging::ValidationProtocol::ApprovalDistribution(x) => x + ); /// Version-annotated messages used by the gossip-support subsystem (this is void). -pub type GossipSupportNetworkMessage = Versioned; +pub type GossipSupportNetworkMessage = + Versioned; // This is a void enum placeholder, so never gets sent over the wire. impl TryFrom for GossipSupportNetworkMessage { type Error = WrongVariant; @@ -384,7 +417,8 @@ impl<'a> TryFrom<&'a VersionedValidationProtocol> for GossipSupportNetworkMessag } /// Version-annotated messages used by the bitfield distribution subsystem. -pub type CollatorProtocolMessage = Versioned; +pub type CollatorProtocolMessage = + Versioned; impl_versioned_full_protocol_from!( CollatorProtocolMessage, VersionedCollationProtocol, @@ -393,7 +427,8 @@ impl_versioned_full_protocol_from!( impl_versioned_try_from!( VersionedCollationProtocol, CollatorProtocolMessage, - v1::CollationProtocol::CollatorProtocol(x) => x + v1::CollationProtocol::CollatorProtocol(x) => x, + vstaging::CollationProtocol::CollatorProtocol(x) => x ); /// v1 notification protocol types. @@ -410,6 +445,11 @@ pub mod v1 { UncheckedSignedFullStatement, }; + use super::ProtocolVersion; + + /// The version of the v1 network protocol. + pub const VERSION: ProtocolVersion = 1; + /// Network messages used by the bitfield distribution subsystem. #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] pub enum BitfieldDistributionMessage { @@ -553,3 +593,173 @@ pub mod v1 { payload } } + +/// vstaging network protocol types. +pub mod vstaging { + use parity_scale_codec::{Decode, Encode}; + + use polkadot_primitives::vstaging::{ + CandidateHash, CandidateIndex, CollatorId, CollatorSignature, CompactStatement, Hash, + Id as ParaId, UncheckedSignedAvailabilityBitfield, ValidatorIndex, ValidatorSignature, + }; + + use polkadot_node_primitives::{ + approval::{IndirectAssignmentCert, IndirectSignedApprovalVote}, + UncheckedSignedFullStatement, + }; + + use super::ProtocolVersion; + + /// The version of the vstaging network protocol. + pub const VERSION: ProtocolVersion = 2; + + /// Network messages used by the bitfield distribution subsystem. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] + pub enum BitfieldDistributionMessage { + /// A signed availability bitfield for a given relay-parent hash. + #[codec(index = 0)] + Bitfield(Hash, UncheckedSignedAvailabilityBitfield), + } + + /// Network messages used by the statement distribution subsystem. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] + pub enum StatementDistributionMessage { + /// A signed full statement under a given relay-parent. + #[codec(index = 0)] + Statement(Hash, UncheckedSignedFullStatement), + /// Seconded statement with large payload (e.g. containing a runtime upgrade). + /// + /// We only gossip the hash in that case, actual payloads can be fetched from sending node + /// via request/response. + #[codec(index = 1)] + LargeStatement(StatementMetadata), + } + + /// Data that makes a statement unique. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, Hash)] + pub struct StatementMetadata { + /// Relay parent this statement is relevant under. + pub relay_parent: Hash, + /// Hash of the candidate that got validated. + pub candidate_hash: CandidateHash, + /// Validator that attested the validity. + pub signed_by: ValidatorIndex, + /// Signature of seconding validator. + pub signature: ValidatorSignature, + } + + impl StatementDistributionMessage { + /// Get fingerprint describing the contained statement uniquely. + pub fn get_fingerprint(&self) -> (CompactStatement, ValidatorIndex) { + match self { + Self::Statement(_, statement) => ( + statement.unchecked_payload().to_compact(), + statement.unchecked_validator_index(), + ), + Self::LargeStatement(meta) => + (CompactStatement::Seconded(meta.candidate_hash), meta.signed_by), + } + } + + /// Get the signature from the statement. + pub fn get_signature(&self) -> ValidatorSignature { + match self { + Self::Statement(_, statement) => statement.unchecked_signature().clone(), + Self::LargeStatement(metadata) => metadata.signature.clone(), + } + } + + /// Get contained relay parent. + pub fn get_relay_parent(&self) -> Hash { + match self { + Self::Statement(r, _) => *r, + Self::LargeStatement(meta) => meta.relay_parent, + } + } + + /// Whether this message contains a large statement. + pub fn is_large_statement(&self) -> bool { + if let Self::LargeStatement(_) = self { + true + } else { + false + } + } + } + + /// Network messages used by the approval distribution subsystem. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] + pub enum ApprovalDistributionMessage { + /// Assignments for candidates in recent, unfinalized blocks. + /// + /// Actually checking the assignment may yield a different result. + #[codec(index = 0)] + Assignments(Vec<(IndirectAssignmentCert, CandidateIndex)>), + /// Approvals for candidates in some recent, unfinalized block. + #[codec(index = 1)] + Approvals(Vec), + } + + /// Dummy network message type, so we will receive connect/disconnect events. + #[derive(Debug, Clone, PartialEq, Eq)] + pub enum GossipSupportNetworkMessage {} + + /// Network messages used by the collator protocol subsystem + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq)] + pub enum CollatorProtocolMessage { + /// Declare the intent to advertise collations under a collator ID, attaching a + /// signature of the `PeerId` of the node using the given collator ID key. + #[codec(index = 0)] + Declare(CollatorId, ParaId, CollatorSignature), + /// Advertise a collation to a validator. Can only be sent once the peer has + /// declared that they are a collator with given ID. + #[codec(index = 1)] + AdvertiseCollation { + /// Hash of the relay parent advertised collation is based on. + relay_parent: Hash, + /// Candidate hash. + candidate_hash: CandidateHash, + /// Parachain head data hash before candidate execution. + parent_head_data_hash: Hash, + }, + /// A collation sent to a validator was seconded. + #[codec(index = 4)] + CollationSeconded(Hash, UncheckedSignedFullStatement), + } + + /// All network messages on the validation peer-set. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, derive_more::From)] + pub enum ValidationProtocol { + /// Bitfield distribution messages + #[codec(index = 1)] + #[from] + BitfieldDistribution(BitfieldDistributionMessage), + /// Statement distribution messages + #[codec(index = 3)] + #[from] + StatementDistribution(StatementDistributionMessage), + /// Approval distribution messages + #[codec(index = 4)] + #[from] + ApprovalDistribution(ApprovalDistributionMessage), + } + + /// All network messages on the collation peer-set. + #[derive(Debug, Clone, Encode, Decode, PartialEq, Eq, derive_more::From)] + pub enum CollationProtocol { + /// Collator protocol messages + #[codec(index = 0)] + #[from] + CollatorProtocol(CollatorProtocolMessage), + } + + /// Get the payload that should be signed and included in a `Declare` message. + /// + /// The payload is the local peer id of the node, which serves to prove that it + /// controls the collator key it is declaring an intention to collate under. + pub fn declare_signature_payload(peer_id: &sc_network::PeerId) -> Vec { + let mut payload = peer_id.to_bytes(); + payload.extend_from_slice(b"COLL"); + payload + } +} diff --git a/node/network/protocol/src/peer_set.rs b/node/network/protocol/src/peer_set.rs index 400b36e3d4c5..4a13fb766554 100644 --- a/node/network/protocol/src/peer_set.rs +++ b/node/network/protocol/src/peer_set.rs @@ -16,7 +16,7 @@ //! All peersets and protocols used for parachains. -use super::ProtocolVersion; +use super::{v1 as protocol_v1, vstaging as protocol_vstaging, ProtocolVersion}; use sc_network::config::{NonDefaultSetConfig, SetConfig}; use std::{ borrow::Cow, @@ -28,11 +28,24 @@ use strum::{EnumIter, IntoEnumIterator}; const VALIDATION_PROTOCOL_V1: &str = "/polkadot/validation/1"; const COLLATION_PROTOCOL_V1: &str = "/polkadot/collation/1"; +const VALIDATION_PROTOCOL_VSTAGING: &str = "/polkadot/validation/2"; +const COLLATION_PROTOCOL_VSTAGING: &str = "/polkadot/collation/2"; + /// The default validation protocol version. -pub const DEFAULT_VALIDATION_PROTOCOL_VERSION: ProtocolVersion = 1; +pub const DEFAULT_VALIDATION_PROTOCOL_VERSION: ProtocolVersion = + if cfg!(feature = "network-protocol-staging") { + protocol_vstaging::VERSION + } else { + protocol_v1::VERSION + }; /// The default collation protocol version. -pub const DEFAULT_COLLATION_PROTOCOL_VERSION: ProtocolVersion = 1; +pub const DEFAULT_COLLATION_PROTOCOL_VERSION: ProtocolVersion = + if cfg!(feature = "network-protocol-staging") { + protocol_vstaging::VERSION + } else { + protocol_v1::VERSION + }; /// The peer-sets and thus the protocols which are used for the network. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, EnumIter)] @@ -70,7 +83,11 @@ impl PeerSet { match self { PeerSet::Validation => NonDefaultSetConfig { notifications_protocol: protocol, - fallback_names: Vec::new(), + fallback_names: if cfg!(feature = "network-protocol-staging") { + vec![VALIDATION_PROTOCOL_V1.into()] + } else { + Vec::new() + }, max_notification_size, set_config: sc_network::config::SetConfig { // we allow full nodes to connect to validators for gossip @@ -85,7 +102,11 @@ impl PeerSet { }, PeerSet::Collation => NonDefaultSetConfig { notifications_protocol: protocol, - fallback_names: Vec::new(), + fallback_names: if cfg!(feature = "network-protocol-staging") { + vec![COLLATION_PROTOCOL_V1.into()] + } else { + Vec::new() + }, max_notification_size, set_config: SetConfig { // Non-authority nodes don't need to accept incoming connections on this peer set: @@ -112,18 +133,31 @@ impl PeerSet { /// Get the default protocol name as a static str. pub const fn get_default_protocol_name(self) -> &'static str { + #[cfg(not(feature = "network-protocol-staging"))] match self { PeerSet::Validation => VALIDATION_PROTOCOL_V1, PeerSet::Collation => COLLATION_PROTOCOL_V1, } + + #[cfg(feature = "network-protocol-staging")] + match self { + PeerSet::Validation => VALIDATION_PROTOCOL_VSTAGING, + PeerSet::Collation => COLLATION_PROTOCOL_VSTAGING, + } } /// Get the protocol name associated with each peer set /// and the given version, if any, as static str. pub const fn get_protocol_name_static(self, version: ProtocolVersion) -> Option<&'static str> { match (self, version) { + // v1 (PeerSet::Validation, 1) => Some(VALIDATION_PROTOCOL_V1), (PeerSet::Collation, 1) => Some(COLLATION_PROTOCOL_V1), + + // vstaging + (PeerSet::Validation, 2) => Some(VALIDATION_PROTOCOL_VSTAGING), + (PeerSet::Collation, 2) => Some(COLLATION_PROTOCOL_VSTAGING), + _ => None, } } @@ -144,8 +178,16 @@ impl PeerSet { /// This only succeeds on supported versions. pub fn try_from_protocol_name(name: &Cow<'static, str>) -> Option<(PeerSet, ProtocolVersion)> { match name { - n if n == VALIDATION_PROTOCOL_V1 => Some((PeerSet::Validation, 1)), - n if n == COLLATION_PROTOCOL_V1 => Some((PeerSet::Collation, 1)), + // v1 + n if n == VALIDATION_PROTOCOL_V1 => Some((PeerSet::Validation, protocol_v1::VERSION)), + n if n == COLLATION_PROTOCOL_V1 => Some((PeerSet::Collation, protocol_v1::VERSION)), + + // vstaging + n if n == VALIDATION_PROTOCOL_VSTAGING => + Some((PeerSet::Validation, protocol_vstaging::VERSION)), + n if n == COLLATION_PROTOCOL_VSTAGING => + Some((PeerSet::Collation, protocol_vstaging::VERSION)), + _ => None, } } From 5b698cae44d639d5efc88947e88ba0bbcbaa5610 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Sat, 16 Jul 2022 23:17:23 +0300 Subject: [PATCH 05/45] Versioned collation fetching request --- .../src/collator_side/collation.rs | 134 ++++++++ .../src/collator_side/metrics.rs | 123 +++++++ .../src/collator_side/mod.rs | 323 ++++++------------ .../src/collator_side/tests.rs | 10 +- node/network/protocol/src/lib.rs | 3 +- .../protocol/src/request_response/mod.rs | 25 +- .../protocol/src/request_response/outgoing.rs | 7 +- .../protocol/src/request_response/vstaging.rs | 37 ++ 8 files changed, 434 insertions(+), 228 deletions(-) create mode 100644 node/network/collator-protocol/src/collator_side/collation.rs create mode 100644 node/network/collator-protocol/src/collator_side/metrics.rs create mode 100644 node/network/protocol/src/request_response/vstaging.rs diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs new file mode 100644 index 000000000000..1f300de19dde --- /dev/null +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -0,0 +1,134 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Primitives for tracking collations-related data. + +use std::collections::{HashSet, VecDeque}; + +use futures::{future::BoxFuture, stream::FuturesUnordered}; + +use polkadot_node_network_protocol::{ + request_response::{ + incoming::OutgoingResponse, v1 as protocol_v1, vstaging as protocol_vstaging, + IncomingRequest, + }, + PeerId, +}; +use polkadot_node_primitives::PoV; +use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, Hash, Id as ParaId}; + +/// The status of a collation as seen from the collator. +pub enum CollationStatus { + /// The collation was created, but we did not advertise it to any validator. + Created, + /// The collation was advertised to at least one validator. + Advertised, + /// The collation was requested by at least one validator. + Requested, +} + +impl CollationStatus { + /// Advance to the [`Self::Advertised`] status. + /// + /// This ensures that `self` isn't already [`Self::Requested`]. + pub fn advance_to_advertised(&mut self) { + if !matches!(self, Self::Requested) { + *self = Self::Advertised; + } + } + + /// Advance to the [`Self::Requested`] status. + pub fn advance_to_requested(&mut self) { + *self = Self::Requested; + } +} + +/// A collation built by the collator. +pub struct Collation { + pub receipt: CandidateReceipt, + pub parent_head_data_hash: Hash, + pub pov: PoV, + pub status: CollationStatus, +} + +/// Stores the state for waiting collation fetches. +#[derive(Default)] +pub struct WaitingCollationFetches { + /// Is there currently a collation getting fetched? + pub collation_fetch_active: bool, + /// The collation fetches waiting to be fulfilled. + pub waiting: VecDeque, + /// All peers that are waiting or actively uploading. + /// + /// We will not accept multiple requests from the same peer, otherwise our DoS protection of + /// moving on to the next peer after `MAX_UNSHARED_UPLOAD_TIME` would be pointless. + pub waiting_peers: HashSet<(PeerId, CandidateHash)>, +} + +pub enum VersionedCollationRequest { + V1(IncomingRequest), + VStaging(IncomingRequest), +} + +impl From> for VersionedCollationRequest { + fn from(req: IncomingRequest) -> Self { + Self::V1(req) + } +} + +impl From> + for VersionedCollationRequest +{ + fn from(req: IncomingRequest) -> Self { + Self::VStaging(req) + } +} + +impl VersionedCollationRequest { + pub fn para_id(&self) -> ParaId { + match self { + VersionedCollationRequest::V1(req) => req.payload.para_id, + VersionedCollationRequest::VStaging(req) => req.payload.para_id, + } + } + + pub fn relay_parent(&self) -> Hash { + match self { + VersionedCollationRequest::V1(req) => req.payload.relay_parent, + VersionedCollationRequest::VStaging(req) => req.payload.relay_parent, + } + } + + pub fn peer_id(&self) -> PeerId { + match self { + VersionedCollationRequest::V1(req) => req.peer, + VersionedCollationRequest::VStaging(req) => req.peer, + } + } + + pub fn send_outgoing_response( + self, + response: OutgoingResponse, + ) -> Result<(), ()> { + match self { + VersionedCollationRequest::V1(req) => req.send_outgoing_response(response), + VersionedCollationRequest::VStaging(req) => req.send_outgoing_response(response), + } + } +} + +pub type ActiveCollationFetches = + FuturesUnordered>; diff --git a/node/network/collator-protocol/src/collator_side/metrics.rs b/node/network/collator-protocol/src/collator_side/metrics.rs new file mode 100644 index 000000000000..99da64f13278 --- /dev/null +++ b/node/network/collator-protocol/src/collator_side/metrics.rs @@ -0,0 +1,123 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem_util::metrics::{self, prometheus}; + +#[derive(Clone, Default)] +pub struct Metrics(Option); + +impl Metrics { + pub fn on_advertisment_made(&self) { + if let Some(metrics) = &self.0 { + metrics.advertisements_made.inc(); + } + } + + pub fn on_collation_sent_requested(&self) { + if let Some(metrics) = &self.0 { + metrics.collations_send_requested.inc(); + } + } + + pub fn on_collation_sent(&self) { + if let Some(metrics) = &self.0 { + metrics.collations_sent.inc(); + } + } + + /// Provide a timer for `process_msg` which observes on drop. + pub fn time_process_msg(&self) -> Option { + self.0.as_ref().map(|metrics| metrics.process_msg.start_timer()) + } + + /// Provide a timer for `distribute_collation` which observes on drop. + pub fn time_collation_distribution( + &self, + label: &'static str, + ) -> Option { + self.0.as_ref().map(|metrics| { + metrics.collation_distribution_time.with_label_values(&[label]).start_timer() + }) + } +} + +#[derive(Clone)] +struct MetricsInner { + advertisements_made: prometheus::Counter, + collations_sent: prometheus::Counter, + collations_send_requested: prometheus::Counter, + process_msg: prometheus::Histogram, + collation_distribution_time: prometheus::HistogramVec, +} + +impl metrics::Metrics for Metrics { + fn try_register( + registry: &prometheus::Registry, + ) -> std::result::Result { + let metrics = MetricsInner { + advertisements_made: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_collation_advertisements_made_total", + "A number of collation advertisements sent to validators.", + )?, + registry, + )?, + collations_send_requested: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_collations_sent_requested_total", + "A number of collations requested to be sent to validators.", + )?, + registry, + )?, + collations_sent: prometheus::register( + prometheus::Counter::new( + "polkadot_parachain_collations_sent_total", + "A number of collations sent to validators.", + )?, + registry, + )?, + process_msg: prometheus::register( + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_collator_process_msg", + "Time spent within `collator_protocol_collator::process_msg`", + ) + .buckets(vec![ + 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, + 1.0, + ]), + )?, + registry, + )?, + collation_distribution_time: prometheus::register( + prometheus::HistogramVec::new( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_collator_distribution_time", + "Time spent within `collator_protocol_collator::distribute_collation`", + ) + .buckets(vec![ + 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, + 1.0, + ]), + &["state"], + )?, + registry, + )?, + }; + + Ok(Metrics(Some(metrics))) + } +} diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 19724ee2d23f..383f65bd6be6 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -15,15 +15,12 @@ // along with Polkadot. If not, see . use std::{ - collections::{HashMap, HashSet, VecDeque}, - pin::Pin, + collections::{HashMap, HashSet}, time::Duration, }; use bitvec::prelude::*; -use futures::{ - channel::oneshot, pin_mut, select, stream::FuturesUnordered, Future, FutureExt, StreamExt, -}; +use futures::{channel::oneshot, pin_mut, select, FutureExt, StreamExt}; use sp_core::Pair; use polkadot_node_network_protocol::{ @@ -31,8 +28,7 @@ use polkadot_node_network_protocol::{ peer_set::PeerSet, request_response::{ incoming::{self, OutgoingResponse}, - v1::{self as request_v1, CollationFetchingRequest, CollationFetchingResponse}, - IncomingRequest, IncomingRequestReceiver, + v1 as request_v1, vstaging as request_vstaging, IncomingRequestReceiver, }, v1 as protocol_v1, vstaging as protocol_vstaging, OurView, PeerId, UnifiedReputationChange as Rep, Versioned, View, @@ -48,7 +44,6 @@ use polkadot_node_subsystem::{ }; use polkadot_node_subsystem_util::{ backing_implicit_view::View as ImplicitView, - metrics::{self, prometheus}, runtime::{get_availability_cores, get_group_rotation_info, RuntimeInfo}, TimeoutExt, }; @@ -59,11 +54,19 @@ use polkadot_primitives::v2::{ use super::LOG_TARGET; use crate::error::{log_error, Error, FatalError, Result}; -use fatality::Split; +mod collation; +mod metrics; #[cfg(test)] mod tests; +use collation::{ + ActiveCollationFetches, Collation, VersionedCollationRequest, CollationStatus, + WaitingCollationFetches, +}; + +pub use metrics::Metrics; + const COST_INVALID_REQUEST: Rep = Rep::CostMajor("Peer sent unparsable request"); const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message"); const COST_APPARENT_FLOOD: Rep = @@ -86,112 +89,6 @@ const MAX_UNSHARED_UPLOAD_TIME: Duration = Duration::from_millis(150); /// we accept and distribute per relay parent. const MAX_CANDIDATE_DEPTH: usize = 4; -#[derive(Clone, Default)] -pub struct Metrics(Option); - -impl Metrics { - fn on_advertisment_made(&self) { - if let Some(metrics) = &self.0 { - metrics.advertisements_made.inc(); - } - } - - fn on_collation_sent_requested(&self) { - if let Some(metrics) = &self.0 { - metrics.collations_send_requested.inc(); - } - } - - fn on_collation_sent(&self) { - if let Some(metrics) = &self.0 { - metrics.collations_sent.inc(); - } - } - - /// Provide a timer for `process_msg` which observes on drop. - fn time_process_msg(&self) -> Option { - self.0.as_ref().map(|metrics| metrics.process_msg.start_timer()) - } - - /// Provide a timer for `distribute_collation` which observes on drop. - fn time_collation_distribution( - &self, - label: &'static str, - ) -> Option { - self.0.as_ref().map(|metrics| { - metrics.collation_distribution_time.with_label_values(&[label]).start_timer() - }) - } -} - -#[derive(Clone)] -struct MetricsInner { - advertisements_made: prometheus::Counter, - collations_sent: prometheus::Counter, - collations_send_requested: prometheus::Counter, - process_msg: prometheus::Histogram, - collation_distribution_time: prometheus::HistogramVec, -} - -impl metrics::Metrics for Metrics { - fn try_register( - registry: &prometheus::Registry, - ) -> std::result::Result { - let metrics = MetricsInner { - advertisements_made: prometheus::register( - prometheus::Counter::new( - "polkadot_parachain_collation_advertisements_made_total", - "A number of collation advertisements sent to validators.", - )?, - registry, - )?, - collations_send_requested: prometheus::register( - prometheus::Counter::new( - "polkadot_parachain_collations_sent_requested_total", - "A number of collations requested to be sent to validators.", - )?, - registry, - )?, - collations_sent: prometheus::register( - prometheus::Counter::new( - "polkadot_parachain_collations_sent_total", - "A number of collations sent to validators.", - )?, - registry, - )?, - process_msg: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_collator_process_msg", - "Time spent within `collator_protocol_collator::process_msg`", - ) - .buckets(vec![ - 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, - 1.0, - ]), - )?, - registry, - )?, - collation_distribution_time: prometheus::register( - prometheus::HistogramVec::new( - prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_collator_distribution_time", - "Time spent within `collator_protocol_collator::distribute_collation`", - ) - .buckets(vec![ - 0.001, 0.002, 0.005, 0.01, 0.025, 0.05, 0.1, 0.15, 0.25, 0.35, 0.5, 0.75, - 1.0, - ]), - &["state"], - )?, - registry, - )?, - }; - - Ok(Metrics(Some(metrics))) - } -} - /// Info about validators we are currently connected to. /// /// It keeps track to which validators we advertised our collation. @@ -264,57 +161,6 @@ impl ValidatorGroup { } } -/// The status of a collation as seen from the collator. -enum CollationStatus { - /// The collation was created, but we did not advertise it to any validator. - Created, - /// The collation was advertised to at least one validator. - Advertised, - /// The collation was requested by at least one validator. - Requested, -} - -impl CollationStatus { - /// Advance to the [`Self::Advertised`] status. - /// - /// This ensures that `self` isn't already [`Self::Requested`]. - fn advance_to_advertised(&mut self) { - if !matches!(self, Self::Requested) { - *self = Self::Advertised; - } - } - - /// Advance to the [`Self::Requested`] status. - fn advance_to_requested(&mut self) { - *self = Self::Requested; - } -} - -/// A collation built by the collator. -struct Collation { - receipt: CandidateReceipt, - parent_head_data_hash: Hash, - pov: PoV, - status: CollationStatus, -} - -/// Stores the state for waiting collation fetches. -#[derive(Default)] -struct WaitingCollationFetches { - /// Is there currently a collation getting fetched? - collation_fetch_active: bool, - /// The collation fetches waiting to be fulfilled. - waiting: VecDeque>, - /// All peers that are waiting or actively uploading. - /// - /// We will not accept multiple requests from the same peer, otherwise our DoS protection of - /// moving on to the next peer after `MAX_UNSHARED_UPLOAD_TIME` would be pointless. - waiting_peers: HashSet, -} - -type ActiveCollationFetches = - FuturesUnordered + Send + 'static>>>; - #[derive(Debug, Clone, Copy, PartialEq)] enum ProspectiveParachainsMode { // v2 runtime API: no prospective parachains. @@ -891,17 +737,18 @@ async fn process_msg( /// Issue a response to a previously requested collation. async fn send_collation( state: &mut State, - request: IncomingRequest, + request: VersionedCollationRequest, receipt: CandidateReceipt, pov: PoV, ) { let (tx, rx) = oneshot::channel(); - let relay_parent = request.payload.relay_parent; - let peer_id = request.peer; + let relay_parent = request.relay_parent(); + let peer_id = request.peer_id(); + let candidate_hash = receipt.hash(); let response = OutgoingResponse { - result: Ok(CollationFetchingResponse::Collation(receipt, pov)), + result: Ok(request_v1::CollationFetchingResponse::Collation(receipt, pov)), reputation_changes: Vec::new(), sent_feedback: Some(tx), }; @@ -921,7 +768,7 @@ async fn send_collation( "Sending collation to validator timed out, carrying on with next validator." ); } - (relay_parent, peer_id) + (relay_parent, candidate_hash, peer_id) } .boxed(), ); @@ -1014,42 +861,75 @@ async fn handle_incoming_peer_message( async fn handle_incoming_request( ctx: &mut Context, state: &mut State, - req: IncomingRequest, + req: std::result::Result, ) -> Result<()> { + let req = req?; + let relay_parent = req.relay_parent(); + let peer_id = req.peer_id(); + let para_id = req.para_id(); + let _span = state .span_per_relay_parent - .get(&req.payload.relay_parent) + .get(&relay_parent) .map(|s| s.child("request-collation")); match state.collating_on { - Some(our_para_id) if our_para_id == req.payload.para_id => { - let (receipt, pov) = - if let Some(collation) = state.collations.get_mut(&req.payload.relay_parent) { - collation.status.advance_to_requested(); - (collation.receipt.clone(), collation.pov.clone()) - } else { + Some(our_para_id) if our_para_id == para_id => { + let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { + Some(per_relay_parent) => per_relay_parent, + None => { + gum::debug!( + target: LOG_TARGET, + relay_parent = %relay_parent, + "received a `RequestCollation` for a relay parent out of our view", + ); + + return Ok(()) + }, + }; + + let collation = match (per_relay_parent.prospective_parachains_mode, &req) { + (ProspectiveParachainsMode::Disabled, VersionedCollationRequest::V1(_)) => + per_relay_parent.collations.values_mut().next(), + (ProspectiveParachainsMode::Enabled, VersionedCollationRequest::VStaging(req)) => + per_relay_parent.collations.get_mut(&req.payload.candidate_hash), + _ => { gum::warn!( target: LOG_TARGET, - relay_parent = %req.payload.relay_parent, - "received a `RequestCollation` for a relay parent we don't have collation stored.", + relay_parent = %relay_parent, + mode = ?per_relay_parent.prospective_parachains_mode, + "Collation request version is invalid", ); return Ok(()) - }; + }, + }; + let (receipt, pov) = if let Some(collation) = collation { + collation.status.advance_to_requested(); + (collation.receipt.clone(), collation.pov.clone()) + } else { + gum::warn!( + target: LOG_TARGET, + relay_parent = %relay_parent, + "received a `RequestCollation` for a relay parent we don't have collation stored.", + ); + + return Ok(()) + }; state.metrics.on_collation_sent_requested(); let _span = _span.as_ref().map(|s| s.child("sending")); - let waiting = - state.waiting_collation_fetches.entry(req.payload.relay_parent).or_default(); + let waiting = state.waiting_collation_fetches.entry(relay_parent).or_default(); + let candidate_hash = receipt.hash(); - if !waiting.waiting_peers.insert(req.peer) { + if !waiting.waiting_peers.insert((peer_id, candidate_hash)) { gum::debug!( target: LOG_TARGET, "Dropping incoming request as peer has a request in flight already." ); - ctx.send_message(NetworkBridgeMessage::ReportPeer(req.peer, COST_APPARENT_FLOOD)) + ctx.send_message(NetworkBridgeMessage::ReportPeer(peer_id, COST_APPARENT_FLOOD)) .await; return Ok(()) } @@ -1066,7 +946,7 @@ async fn handle_incoming_request( Some(our_para_id) => { gum::warn!( target: LOG_TARGET, - for_para_id = %req.payload.para_id, + for_para_id = %para_id, our_para_id = %our_para_id, "received a `CollationFetchingRequest` for unexpected para_id", ); @@ -1074,7 +954,7 @@ async fn handle_incoming_request( None => { gum::warn!( target: LOG_TARGET, - for_para_id = %req.payload.para_id, + for_para_id = %para_id, "received a `RequestCollation` while not collating on any para", ); }, @@ -1278,7 +1158,8 @@ pub(crate) async fn run( mut ctx: Context, local_peer_id: PeerId, collator_pair: CollatorPair, - mut req_receiver: IncomingRequestReceiver, + mut req_v1_receiver: IncomingRequestReceiver, + mut req_v2_receiver: IncomingRequestReceiver, metrics: Metrics, ) -> std::result::Result<(), FatalError> { use OverseerSignal::*; @@ -1287,8 +1168,12 @@ pub(crate) async fn run( let mut runtime = RuntimeInfo::new(None); loop { - let recv_req = req_receiver.recv(|| vec![COST_INVALID_REQUEST]).fuse(); - pin_mut!(recv_req); + let reputation_changes = || vec![COST_INVALID_REQUEST]; + let recv_req_v1 = req_v1_receiver.recv(reputation_changes).fuse(); + let recv_req_v2 = req_v2_receiver.recv(reputation_changes).fuse(); + pin_mut!(recv_req_v1); + pin_mut!(recv_req_v2); + select! { msg = ctx.recv().fuse() => match msg.map_err(FatalError::SubsystemReceive)? { FromOrchestra::Communication { msg } => { @@ -1301,9 +1186,9 @@ pub(crate) async fn run( FromOrchestra::Signal(BlockFinalized(..)) => {} FromOrchestra::Signal(Conclude) => return Ok(()), }, - (relay_parent, peer_id) = state.active_collation_fetches.select_next_some() => { + (relay_parent, candidate_hash, peer_id) = state.active_collation_fetches.select_next_some() => { let next = if let Some(waiting) = state.waiting_collation_fetches.get_mut(&relay_parent) { - waiting.waiting_peers.remove(&peer_id); + waiting.waiting_peers.remove(&(peer_id, candidate_hash)); if let Some(next) = waiting.waiting.pop_front() { next } else { @@ -1315,31 +1200,45 @@ pub(crate) async fn run( continue }; - if let Some(collation) = state.collations.get(&relay_parent) { + let next_collation = { + let per_relay_parent = match state.per_relay_parent.get(&relay_parent) { + Some(per_relay_parent) => per_relay_parent, + None => continue, + }; + + match (per_relay_parent.prospective_parachains_mode, &next) { + (ProspectiveParachainsMode::Disabled, VersionedCollationRequest::V1(_)) => { + per_relay_parent.collations.values().next() + }, + (ProspectiveParachainsMode::Enabled, VersionedCollationRequest::VStaging(req)) => { + per_relay_parent.collations.get(&req.payload.candidate_hash) + }, + _ => continue, + } + }; + + if let Some(collation) = next_collation { let receipt = collation.receipt.clone(); let pov = collation.pov.clone(); send_collation(&mut state, next, receipt, pov).await; } } - in_req = recv_req => { - match in_req { - Ok(req) => { - log_error( - handle_incoming_request(&mut ctx, &mut state, req).await, - "Handling incoming request" - )?; - } - Err(error) => { - let jfyi = error.split().map_err(incoming::Error::from)?; - gum::debug!( - target: LOG_TARGET, - error = ?jfyi, - "Decoding incoming request failed" - ); - continue - } - } + in_req = recv_req_v1 => { + let request = in_req.map(VersionedCollationRequest::from); + + log_error( + handle_incoming_request(&mut ctx, &mut state, request).await, + "Handling incoming request" + )?; + } + in_req = recv_req_v2 => { + let request = in_req.map(VersionedCollationRequest::from); + + log_error( + handle_incoming_request(&mut ctx, &mut state, request).await, + "Handling incoming request" + )?; } } } diff --git a/node/network/collator-protocol/src/collator_side/tests.rs b/node/network/collator-protocol/src/collator_side/tests.rs index 41cabb39a600..0c54353fd06d 100644 --- a/node/network/collator-protocol/src/collator_side/tests.rs +++ b/node/network/collator-protocol/src/collator_side/tests.rs @@ -540,7 +540,7 @@ fn advertise_and_send_collation() { .unwrap() .send(RawIncomingRequest { peer, - payload: CollationFetchingRequest { + payload: VersionedCollationRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -559,7 +559,7 @@ fn advertise_and_send_collation() { .unwrap() .send(RawIncomingRequest { peer, - payload: CollationFetchingRequest { + payload: VersionedCollationRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -609,7 +609,7 @@ fn advertise_and_send_collation() { .unwrap() .send(RawIncomingRequest { peer, - payload: CollationFetchingRequest { + payload: VersionedCollationRequest { relay_parent: old_relay_parent, para_id: test_state.para_id, } @@ -911,7 +911,7 @@ where .unwrap() .send(RawIncomingRequest { peer: validator_0, - payload: CollationFetchingRequest { + payload: VersionedCollationRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -946,7 +946,7 @@ where .unwrap() .send(RawIncomingRequest { peer: validator_1, - payload: CollationFetchingRequest { + payload: VersionedCollationRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs index 160cb6fc3c48..dff113a54d5f 100644 --- a/node/network/protocol/src/lib.rs +++ b/node/network/protocol/src/lib.rs @@ -19,7 +19,8 @@ #![deny(unused_crate_dependencies)] #![warn(missing_docs)] -use parity_scale_codec::{Decode, Encode}; +#[doc(hidden)] +pub use parity_scale_codec::{Decode, Encode}; use polkadot_primitives::v2::{BlockNumber, Hash}; use std::{collections::HashMap, fmt}; diff --git a/node/network/protocol/src/request_response/mod.rs b/node/network/protocol/src/request_response/mod.rs index 7a0f85eaeced..b36071717a34 100644 --- a/node/network/protocol/src/request_response/mod.rs +++ b/node/network/protocol/src/request_response/mod.rs @@ -55,6 +55,9 @@ pub use outgoing::{OutgoingRequest, OutgoingResult, Recipient, Requests, Respons /// Actual versioned requests and responses, that are sent over the wire. pub mod v1; +/// Staging requests to be sent over the wire. +pub mod vstaging; + /// A protocol per subsystem seems to make the most sense, this way we don't need any dispatching /// within protocols. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, EnumIter)] @@ -63,6 +66,8 @@ pub enum Protocol { ChunkFetchingV1, /// Protocol for fetching collations from collators. CollationFetchingV1, + /// Protocol for fetching collations from collators when async backing is enabled. + CollationFetchingVStaging, /// Protocol for fetching seconded PoVs from validators of the same group. PoVFetchingV1, /// Protocol for fetching available data. @@ -138,14 +143,15 @@ impl Protocol { request_timeout: CHUNK_REQUEST_TIMEOUT, inbound_queue: Some(tx), }, - Protocol::CollationFetchingV1 => RequestResponseConfig { - name: p_name, - max_request_size: 1_000, - max_response_size: POV_RESPONSE_SIZE, - // Taken from initial implementation in collator protocol: - request_timeout: POV_REQUEST_TIMEOUT_CONNECTED, - inbound_queue: Some(tx), - }, + Protocol::CollationFetchingV1 | Protocol::CollationFetchingVStaging => + RequestResponseConfig { + name: p_name, + max_request_size: 1_000, + max_response_size: POV_RESPONSE_SIZE, + // Taken from initial implementation in collator protocol: + request_timeout: POV_REQUEST_TIMEOUT_CONNECTED, + inbound_queue: Some(tx), + }, Protocol::PoVFetchingV1 => RequestResponseConfig { name: p_name, max_request_size: 1_000, @@ -203,7 +209,7 @@ impl Protocol { // as well. Protocol::ChunkFetchingV1 => 100, // 10 seems reasonable, considering group sizes of max 10 validators. - Protocol::CollationFetchingV1 => 10, + Protocol::CollationFetchingV1 | Protocol::CollationFetchingVStaging => 10, // 10 seems reasonable, considering group sizes of max 10 validators. Protocol::PoVFetchingV1 => 10, // Validators are constantly self-selecting to request available data which may lead @@ -247,6 +253,7 @@ impl Protocol { match self { Protocol::ChunkFetchingV1 => "/polkadot/req_chunk/1", Protocol::CollationFetchingV1 => "/polkadot/req_collation/1", + Protocol::CollationFetchingVStaging => "/polkadot/req_collation/2", Protocol::PoVFetchingV1 => "/polkadot/req_pov/1", Protocol::AvailableDataFetchingV1 => "/polkadot/req_available_data/1", Protocol::StatementFetchingV1 => "/polkadot/req_statement/1", diff --git a/node/network/protocol/src/request_response/outgoing.rs b/node/network/protocol/src/request_response/outgoing.rs index a9353965a48f..c27ff3a80fe3 100644 --- a/node/network/protocol/src/request_response/outgoing.rs +++ b/node/network/protocol/src/request_response/outgoing.rs @@ -23,7 +23,7 @@ use sc_network::PeerId; use polkadot_primitives::v2::AuthorityDiscoveryId; -use super::{v1, IsRequest, Protocol}; +use super::{v1, vstaging, IsRequest, Protocol}; /// All requests that can be sent to the network bridge via `NetworkBridgeMessage::SendRequest`. #[derive(Debug)] @@ -32,6 +32,9 @@ pub enum Requests { ChunkFetchingV1(OutgoingRequest), /// Fetch a collation from a collator which previously announced it. CollationFetchingV1(OutgoingRequest), + /// Fetch a collation from a collator which previously announced it. + /// Compared to V1 it requires specifying which candidate is requested by its hash. + CollationFetchingVStaging(OutgoingRequest), /// Fetch a PoV from a validator which previously sent out a seconded statement. PoVFetchingV1(OutgoingRequest), /// Request full available data from a node. @@ -48,6 +51,7 @@ impl Requests { match self { Self::ChunkFetchingV1(_) => Protocol::ChunkFetchingV1, Self::CollationFetchingV1(_) => Protocol::CollationFetchingV1, + Self::CollationFetchingVStaging(_) => Protocol::CollationFetchingVStaging, Self::PoVFetchingV1(_) => Protocol::PoVFetchingV1, Self::AvailableDataFetchingV1(_) => Protocol::AvailableDataFetchingV1, Self::StatementFetchingV1(_) => Protocol::StatementFetchingV1, @@ -66,6 +70,7 @@ impl Requests { match self { Self::ChunkFetchingV1(r) => r.encode_request(), Self::CollationFetchingV1(r) => r.encode_request(), + Self::CollationFetchingVStaging(r) => r.encode_request(), Self::PoVFetchingV1(r) => r.encode_request(), Self::AvailableDataFetchingV1(r) => r.encode_request(), Self::StatementFetchingV1(r) => r.encode_request(), diff --git a/node/network/protocol/src/request_response/vstaging.rs b/node/network/protocol/src/request_response/vstaging.rs new file mode 100644 index 000000000000..058bdccdcb03 --- /dev/null +++ b/node/network/protocol/src/request_response/vstaging.rs @@ -0,0 +1,37 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use parity_scale_codec::{Decode, Encode}; +use polkadot_primitives::v2::{CandidateHash, Hash, Id as ParaId}; + +use super::{IsRequest, Protocol}; + +/// Request the advertised collation at that relay-parent. +#[derive(Debug, Clone, Encode, Decode)] +pub struct CollationFetchingRequest { + /// Relay parent collation is built on top of. + pub relay_parent: Hash, + /// The `ParaId` of the collation. + pub para_id: ParaId, + /// Candidate hash. + pub candidate_hash: CandidateHash, +} + +impl IsRequest for CollationFetchingRequest { + // The response is the same as for V1. + type Response = super::v1::CollationFetchingResponse; + const PROTOCOL: Protocol = Protocol::CollationFetchingVStaging; +} From 5caae2fa5e7f8280689e67b1cf23b5f97220c863 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 18 Jul 2022 14:50:22 +0300 Subject: [PATCH 06/45] Handle versioned messages --- .../src/collator_side/mod.rs | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 383f65bd6be6..46a7ccc67682 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -61,7 +61,7 @@ mod metrics; mod tests; use collation::{ - ActiveCollationFetches, Collation, VersionedCollationRequest, CollationStatus, + ActiveCollationFetches, Collation, CollationStatus, VersionedCollationRequest, WaitingCollationFetches, }; @@ -783,12 +783,16 @@ async fn handle_incoming_peer_message( runtime: &mut RuntimeInfo, state: &mut State, origin: PeerId, - msg: protocol_v1::CollatorProtocolMessage, + msg: Versioned< + protocol_v1::CollatorProtocolMessage, + protocol_vstaging::CollatorProtocolMessage, + >, ) -> Result<()> { - use protocol_v1::CollatorProtocolMessage::*; + use protocol_v1::CollatorProtocolMessage as V1; + use protocol_vstaging::CollatorProtocolMessage as VStaging; match msg { - Declare(_, _, _) => { + Versioned::V1(V1::Declare(..)) | Versioned::VStaging(VStaging::Declare(..)) => { gum::trace!( target: LOG_TARGET, ?origin, @@ -799,7 +803,8 @@ async fn handle_incoming_peer_message( ctx.send_message(NetworkBridgeMessage::DisconnectPeer(origin, PeerSet::Collation)) .await; }, - AdvertiseCollation(_) => { + Versioned::V1(V1::AdvertiseCollation(_)) | + Versioned::VStaging(VStaging::AdvertiseCollation { .. }) => { gum::trace!( target: LOG_TARGET, ?origin, @@ -816,7 +821,8 @@ async fn handle_incoming_peer_message( ctx.send_message(NetworkBridgeMessage::DisconnectPeer(origin, PeerSet::Collation)) .await; }, - CollationSeconded(relay_parent, statement) => { + Versioned::V1(V1::CollationSeconded(relay_parent, statement)) | + Versioned::VStaging(VStaging::CollationSeconded(relay_parent, statement)) => { if !matches!(statement.unchecked_payload(), Statement::Seconded(_)) { gum::warn!( target: LOG_TARGET, @@ -1051,7 +1057,7 @@ async fn handle_network_msg( gum::trace!(target: LOG_TARGET, ?view, "Own view change"); handle_our_view_change(ctx.sender(), state, view).await?; }, - PeerMessage(remote, Versioned::V1(msg)) => { + PeerMessage(remote, msg) => { handle_incoming_peer_message(ctx, runtime, state, remote, msg).await?; }, NewGossipTopology { .. } => { From 70b432e6ccc0e920a0ab460134a29c6bf08e9611 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 18 Jul 2022 15:00:25 +0300 Subject: [PATCH 07/45] Improve docs for collation requests --- .../collator-protocol/src/collator_side/collation.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs index 1f300de19dde..2b59f2aed000 100644 --- a/node/network/collator-protocol/src/collator_side/collation.rs +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -64,10 +64,14 @@ pub struct Collation { pub status: CollationStatus, } -/// Stores the state for waiting collation fetches. +/// Stores the state for waiting collation fetches per relay parent. #[derive(Default)] pub struct WaitingCollationFetches { - /// Is there currently a collation getting fetched? + /// A flag indicating that we have an ongoing request. + /// This limits the number of collations being sent at any moment + /// of time to 1 for each relay parent. + /// + /// If set to `true`, any new request will be queued. pub collation_fetch_active: bool, /// The collation fetches waiting to be fulfilled. pub waiting: VecDeque, @@ -78,6 +82,7 @@ pub struct WaitingCollationFetches { pub waiting_peers: HashSet<(PeerId, CandidateHash)>, } +/// Backwards-compatible wrapper for incoming collations requests. pub enum VersionedCollationRequest { V1(IncomingRequest), VStaging(IncomingRequest), From 65280e09f9ebe5c50699a7d39a72253af5f1bcdc Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 18 Jul 2022 15:30:41 +0300 Subject: [PATCH 08/45] Add spans --- node/network/collator-protocol/src/collator_side/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 46a7ccc67682..9d6f5f819cf3 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -1085,6 +1085,11 @@ where for leaf in added { let mode = prospective_parachains_mode(sender, *leaf).await?; + if let Some(span) = view.span_per_head().get(leaf).cloned() { + let per_leaf_span = PerLeafSpan::new(span, "collator-side"); + state.span_per_relay_parent.insert(*leaf, per_leaf_span); + } + state.active_leaves.insert(*leaf, mode); state.per_relay_parent.insert(*leaf, PerRelayParent::new(mode)); From 96e18176f22500d5d8de30382b03726ab8808030 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 18 Jul 2022 17:10:41 +0300 Subject: [PATCH 09/45] Add request receiver to overseer --- node/network/collator-protocol/src/lib.rs | 34 +++++++++++++++++------ node/service/src/lib.rs | 7 +++-- node/service/src/overseer.rs | 22 +++++++++------ 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index bdf8904b7d07..d59091be4378 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -28,7 +28,7 @@ use futures::{FutureExt, TryFutureExt}; use sp_keystore::SyncCryptoStorePtr; use polkadot_node_network_protocol::{ - request_response::{v1 as request_v1, IncomingRequestReceiver}, + request_response::{v1 as request_v1, vstaging as protocol_vstaging, IncomingRequestReceiver}, PeerId, UnifiedReputationChange as Rep, }; use polkadot_primitives::v2::CollatorPair; @@ -74,12 +74,14 @@ pub enum ProtocolSide { metrics: validator_side::Metrics, }, /// Collators operate on a parachain. - Collator( - PeerId, - CollatorPair, - IncomingRequestReceiver, - collator_side::Metrics, - ), + Collator { + peer_id: PeerId, + collator_pair: CollatorPair, + request_receiver_v1: IncomingRequestReceiver, + request_receiver_vstaging: + IncomingRequestReceiver, + metrics: collator_side::Metrics, + }, } /// The collator protocol subsystem. @@ -101,8 +103,22 @@ impl CollatorProtocolSubsystem { match self.protocol_side { ProtocolSide::Validator { keystore, eviction_policy, metrics } => validator_side::run(ctx, keystore, eviction_policy, metrics).await, - ProtocolSide::Collator(local_peer_id, collator_pair, req_receiver, metrics) => - collator_side::run(ctx, local_peer_id, collator_pair, req_receiver, metrics).await, + ProtocolSide::Collator { + peer_id, + collator_pair, + request_receiver_v1, + request_receiver_vstaging, + metrics, + } => + collator_side::run( + ctx, + peer_id, + collator_pair, + request_receiver_v1, + request_receiver_vstaging, + metrics, + ) + .await, } } } diff --git a/node/service/src/lib.rs b/node/service/src/lib.rs index 95b613d998f0..f4afe1eca57c 100644 --- a/node/service/src/lib.rs +++ b/node/service/src/lib.rs @@ -852,7 +852,9 @@ where config.network.request_response_protocols.push(cfg); let (chunk_req_receiver, cfg) = IncomingRequest::get_config_receiver(); config.network.request_response_protocols.push(cfg); - let (collation_req_receiver, cfg) = IncomingRequest::get_config_receiver(); + let (collation_req_v1_receiver, cfg) = IncomingRequest::get_config_receiver(); + config.network.request_response_protocols.push(cfg); + let (collation_req_vstaging_receiver, cfg) = IncomingRequest::get_config_receiver(); config.network.request_response_protocols.push(cfg); let (available_data_req_receiver, cfg) = IncomingRequest::get_config_receiver(); config.network.request_response_protocols.push(cfg); @@ -1033,7 +1035,8 @@ where authority_discovery_service, pov_req_receiver, chunk_req_receiver, - collation_req_receiver, + collation_req_v1_receiver, + collation_req_vstaging_receiver, available_data_req_receiver, statement_req_receiver, dispute_req_receiver, diff --git a/node/service/src/overseer.rs b/node/service/src/overseer.rs index 527b99cdc1ce..84f1599d1bc3 100644 --- a/node/service/src/overseer.rs +++ b/node/service/src/overseer.rs @@ -24,7 +24,9 @@ use polkadot_node_core_av_store::Config as AvailabilityConfig; use polkadot_node_core_candidate_validation::Config as CandidateValidationConfig; use polkadot_node_core_chain_selection::Config as ChainSelectionConfig; use polkadot_node_core_dispute_coordinator::Config as DisputeCoordinatorConfig; -use polkadot_node_network_protocol::request_response::{v1 as request_v1, IncomingRequestReceiver}; +use polkadot_node_network_protocol::request_response::{ + v1 as request_v1, vstaging as request_vstaging, IncomingRequestReceiver, +}; #[cfg(any(feature = "malus", test))] pub use polkadot_overseer::{ dummy::{dummy_overseer_builder, DummySubsystem}, @@ -89,7 +91,9 @@ where /// POV request receiver pub pov_req_receiver: IncomingRequestReceiver, pub chunk_req_receiver: IncomingRequestReceiver, - pub collation_req_receiver: IncomingRequestReceiver, + pub collation_req_v1_receiver: IncomingRequestReceiver, + pub collation_req_vstaging_receiver: + IncomingRequestReceiver, pub available_data_req_receiver: IncomingRequestReceiver, pub statement_req_receiver: IncomingRequestReceiver, @@ -128,7 +132,8 @@ pub fn prepared_overseer_builder<'a, Spawner, RuntimeClient>( authority_discovery_service, pov_req_receiver, chunk_req_receiver, - collation_req_receiver, + collation_req_v1_receiver, + collation_req_vstaging_receiver, available_data_req_receiver, statement_req_receiver, dispute_req_receiver, @@ -224,12 +229,13 @@ where .collation_generation(CollationGenerationSubsystem::new(Metrics::register(registry)?)) .collator_protocol({ let side = match is_collator { - IsCollator::Yes(collator_pair) => ProtocolSide::Collator( - network_service.local_peer_id().clone(), + IsCollator::Yes(collator_pair) => ProtocolSide::Collator { + peer_id: network_service.local_peer_id().clone(), collator_pair, - collation_req_receiver, - Metrics::register(registry)?, - ), + request_receiver_v1: collation_req_v1_receiver, + request_receiver_vstaging: collation_req_vstaging_receiver, + metrics: Metrics::register(registry)?, + }, IsCollator::No => ProtocolSide::Validator { keystore: keystore.clone(), eviction_policy: Default::default(), From 77ba0973ef893c2927bde21b4a16f16ec81ae425 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 18 Jul 2022 18:52:35 +0300 Subject: [PATCH 10/45] Fix collator side tests --- .../src/collator_side/tests.rs | 95 ++++++++++++++----- node/network/collator-protocol/src/lib.rs | 5 + .../src/validator_side/mod.rs | 1 + 3 files changed, 76 insertions(+), 25 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/tests.rs b/node/network/collator-protocol/src/collator_side/tests.rs index 0c54353fd06d..1da6966eb266 100644 --- a/node/network/collator-protocol/src/collator_side/tests.rs +++ b/node/network/collator-protocol/src/collator_side/tests.rs @@ -29,7 +29,11 @@ use sp_core::crypto::Pair; use sp_keyring::Sr25519Keyring; use sp_runtime::traits::AppVerify; -use polkadot_node_network_protocol::{our_view, request_response::IncomingRequest, view}; +use polkadot_node_network_protocol::{ + our_view, + request_response::{v1 as request_v1, IncomingRequest}, + view, +}; use polkadot_node_primitives::BlockData; use polkadot_node_subsystem::{ jaeger, @@ -44,6 +48,8 @@ use polkadot_primitives::v2::{ }; use polkadot_primitives_test_helpers::TestCandidateBuilder; +const API_VERSION_PROSPECTIVE_DISABLED: u32 = 2; + #[derive(Clone)] struct TestState { para_id: ParaId, @@ -176,6 +182,17 @@ impl TestState { )), ) .await; + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Version(tx) + )) => { + assert_eq!(relay_parent, self.relay_parent); + tx.send(Ok(API_VERSION_PROSPECTIVE_DISABLED)).unwrap(); + } + ); } } @@ -183,7 +200,8 @@ type VirtualOverseer = test_helpers::TestSubsystemContextHandle>( @@ -201,14 +219,23 @@ fn test_harness>( let (context, virtual_overseer) = test_helpers::make_subsystem_context(pool.clone()); - let (collation_req_receiver, req_cfg) = IncomingRequest::get_config_receiver(); + let (collation_req_receiver, req_v1_cfg) = IncomingRequest::get_config_receiver(); + let (collation_req_vstaging_receiver, req_vstaging_cfg) = + IncomingRequest::get_config_receiver(); let subsystem = async { - run(context, local_peer_id, collator_pair, collation_req_receiver, Default::default()) - .await - .unwrap(); + run( + context, + local_peer_id, + collator_pair, + collation_req_receiver, + collation_req_vstaging_receiver, + Default::default(), + ) + .await + .unwrap(); }; - let test_fut = test(TestHarness { virtual_overseer, req_cfg }); + let test_fut = test(TestHarness { virtual_overseer, req_v1_cfg, req_vstaging_cfg }); futures::pin_mut!(test_fut); futures::pin_mut!(subsystem); @@ -282,6 +309,17 @@ async fn setup_system(virtual_overseer: &mut VirtualOverseer, test_state: &TestS ])), ) .await; + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Version(tx) + )) => { + assert_eq!(relay_parent, test_state.relay_parent); + tx.send(Ok(API_VERSION_PROSPECTIVE_DISABLED)).unwrap(); + } + ); } /// Result of [`distribute_collation`] @@ -301,6 +339,7 @@ async fn distribute_collation( let pov_block = PoV { block_data: BlockData(vec![42, 43, 44]) }; let pov_hash = pov_block.hash(); + let parent_head_data_hash = Hash::zero(); let candidate = TestCandidateBuilder { para_id: test_state.para_id, @@ -312,7 +351,12 @@ async fn distribute_collation( overseer_send( virtual_overseer, - CollatorProtocolMessage::DistributeCollation(candidate.clone(), pov_block.clone(), None), + CollatorProtocolMessage::DistributeCollation( + candidate.clone(), + parent_head_data_hash, + pov_block.clone(), + None, + ), ) .await; @@ -501,7 +545,8 @@ fn advertise_and_send_collation() { test_harness(local_peer_id, collator_pair, |test_harness| async move { let mut virtual_overseer = test_harness.virtual_overseer; - let mut req_cfg = test_harness.req_cfg; + let mut req_v1_cfg = test_harness.req_v1_cfg; + let req_vstaging_cfg = test_harness.req_vstaging_cfg; setup_system(&mut virtual_overseer, &test_state).await; @@ -534,13 +579,13 @@ fn advertise_and_send_collation() { // Request a collation. let (pending_response, rx) = oneshot::channel(); - req_cfg + req_v1_cfg .inbound_queue .as_mut() .unwrap() .send(RawIncomingRequest { peer, - payload: VersionedCollationRequest { + payload: request_v1::CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -553,13 +598,13 @@ fn advertise_and_send_collation() { { let (pending_response, rx) = oneshot::channel(); - req_cfg + req_v1_cfg .inbound_queue .as_mut() .unwrap() .send(RawIncomingRequest { peer, - payload: VersionedCollationRequest { + payload: request_v1::CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -584,8 +629,8 @@ fn advertise_and_send_collation() { assert_matches!( rx.await, Ok(full_response) => { - let CollationFetchingResponse::Collation(receipt, pov): CollationFetchingResponse - = CollationFetchingResponse::decode( + let request_v1::CollationFetchingResponse::Collation(receipt, pov): request_v1::CollationFetchingResponse + = request_v1::CollationFetchingResponse::decode( &mut full_response.result .expect("We should have a proper answer").as_ref() ) @@ -603,13 +648,13 @@ fn advertise_and_send_collation() { // Re-request a collation. let (pending_response, rx) = oneshot::channel(); - req_cfg + req_v1_cfg .inbound_queue .as_mut() .unwrap() .send(RawIncomingRequest { peer, - payload: VersionedCollationRequest { + payload: request_v1::CollationFetchingRequest { relay_parent: old_relay_parent, para_id: test_state.para_id, } @@ -636,7 +681,7 @@ fn advertise_and_send_collation() { .await; expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; - TestHarness { virtual_overseer, req_cfg } + TestHarness { virtual_overseer, req_v1_cfg, req_vstaging_cfg } }); } @@ -867,7 +912,7 @@ where test_harness(local_peer_id, collator_pair, |mut test_harness| async move { let virtual_overseer = &mut test_harness.virtual_overseer; - let req_cfg = &mut test_harness.req_cfg; + let req_cfg = &mut test_harness.req_v1_cfg; setup_system(virtual_overseer, &test_state).await; @@ -911,7 +956,7 @@ where .unwrap() .send(RawIncomingRequest { peer: validator_0, - payload: VersionedCollationRequest { + payload: request_v1::CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -925,8 +970,8 @@ where let feedback_tx = assert_matches!( rx.await, Ok(full_response) => { - let CollationFetchingResponse::Collation(receipt, pov): CollationFetchingResponse - = CollationFetchingResponse::decode( + let request_v1::CollationFetchingResponse::Collation(receipt, pov): request_v1::CollationFetchingResponse + = request_v1::CollationFetchingResponse::decode( &mut full_response.result .expect("We should have a proper answer").as_ref() ) @@ -946,7 +991,7 @@ where .unwrap() .send(RawIncomingRequest { peer: validator_1, - payload: VersionedCollationRequest { + payload: request_v1::CollationFetchingRequest { relay_parent: test_state.relay_parent, para_id: test_state.para_id, } @@ -962,8 +1007,8 @@ where assert_matches!( rx.await, Ok(full_response) => { - let CollationFetchingResponse::Collation(receipt, pov): CollationFetchingResponse - = CollationFetchingResponse::decode( + let request_v1::CollationFetchingResponse::Collation(receipt, pov): request_v1::CollationFetchingResponse + = request_v1::CollationFetchingResponse::decode( &mut full_response.result .expect("We should have a proper answer").as_ref() ) diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index d59091be4378..3edadc64823c 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -75,11 +75,16 @@ pub enum ProtocolSide { }, /// Collators operate on a parachain. Collator { + /// Local peer id. peer_id: PeerId, + /// Parachain collator pair. collator_pair: CollatorPair, + /// Receiver for v1 collation fetching requests. request_receiver_v1: IncomingRequestReceiver, + /// Receiver for vstaging collation fetching requests. request_receiver_vstaging: IncomingRequestReceiver, + /// Metrics. metrics: collator_side::Metrics, }, } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 0e6816d7bfa9..acc6cfbe4743 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -1084,6 +1084,7 @@ async fn handle_network_msg( PeerMessage(remote, Versioned::V1(msg)) => { process_incoming_peer_message(ctx, state, remote, msg).await; }, + PeerMessage(_, Versioned::VStaging(_)) => todo!(), } Ok(()) From 0288476b2c5d25028de9441219051a5cdd6d29ec Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Sun, 31 Jul 2022 18:57:30 +0300 Subject: [PATCH 11/45] Extract relay parent mode to lib --- .../src/collator_side/mod.rs | 53 +----------------- node/network/collator-protocol/src/lib.rs | 56 ++++++++++++++++++- 2 files changed, 55 insertions(+), 54 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 9d6f5f819cf3..fa5e193fd1ad 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -38,7 +38,6 @@ use polkadot_node_subsystem::{ jaeger, messages::{ CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeMessage, RuntimeApiMessage, - RuntimeApiRequest, }, overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; @@ -52,7 +51,7 @@ use polkadot_primitives::v2::{ Hash, Id as ParaId, }; -use super::LOG_TARGET; +use super::{prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET}; use crate::error::{log_error, Error, FatalError, Result}; mod collation; @@ -161,56 +160,6 @@ impl ValidatorGroup { } } -#[derive(Debug, Clone, Copy, PartialEq)] -enum ProspectiveParachainsMode { - // v2 runtime API: no prospective parachains. - Disabled, - // vstaging runtime API: prospective parachains. - Enabled, -} - -impl ProspectiveParachainsMode { - fn is_enabled(&self) -> bool { - matches!(self, Self::Enabled) - } -} - -async fn prospective_parachains_mode( - sender: &mut Sender, - leaf_hash: Hash, -) -> Result -where - Sender: CollatorProtocolSenderTrait, -{ - // TODO: call a Runtime API once staging version is available - // https://github.com/paritytech/substrate/discussions/11338 - // - // Implementation should be shared with backing & provisioner. - - let (tx, rx) = oneshot::channel(); - sender - .send_message(RuntimeApiMessage::Request(leaf_hash, RuntimeApiRequest::Version(tx))) - .await; - - let version = rx - .await - .map_err(Error::CancelledRuntimeApiVersion)? - .map_err(Error::RuntimeApi)?; - - if version == 3 { - Ok(ProspectiveParachainsMode::Enabled) - } else { - if version != 2 { - gum::warn!( - target: LOG_TARGET, - "Runtime API version is {}, expected 2 or 3. Prospective parachains are disabled", - version - ); - } - Ok(ProspectiveParachainsMode::Disabled) - } -} - struct PerRelayParent { prospective_parachains_mode: ProspectiveParachainsMode, /// Validators group responsible for backing candidates built diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index 3edadc64823c..febad0515d60 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -31,10 +31,12 @@ use polkadot_node_network_protocol::{ request_response::{v1 as request_v1, vstaging as protocol_vstaging, IncomingRequestReceiver}, PeerId, UnifiedReputationChange as Rep, }; -use polkadot_primitives::v2::CollatorPair; +use polkadot_primitives::v2::{CollatorPair, Hash}; use polkadot_node_subsystem::{ - errors::SubsystemError, messages::NetworkBridgeMessage, overseer, SpawnedSubsystem, + errors::SubsystemError, + messages::{NetworkBridgeMessage, RuntimeApiMessage, RuntimeApiRequest}, + overseer, SpawnedSubsystem, }; mod error; @@ -155,3 +157,53 @@ async fn modify_reputation( sender.send_message(NetworkBridgeMessage::ReportPeer(peer, rep)).await; } + +#[derive(Debug, Clone, Copy, PartialEq)] +enum ProspectiveParachainsMode { + // v2 runtime API: no prospective parachains. + Disabled, + // vstaging runtime API: prospective parachains. + Enabled, +} + +impl ProspectiveParachainsMode { + fn is_enabled(&self) -> bool { + matches!(self, Self::Enabled) + } +} + +async fn prospective_parachains_mode( + sender: &mut Sender, + leaf_hash: Hash, +) -> Result +where + Sender: polkadot_node_subsystem::CollatorProtocolSenderTrait, +{ + // TODO: call a Runtime API once staging version is available + // https://github.com/paritytech/substrate/discussions/11338 + // + // Implementation should be shared with backing & provisioner. + + let (tx, rx) = futures::channel::oneshot::channel(); + sender + .send_message(RuntimeApiMessage::Request(leaf_hash, RuntimeApiRequest::Version(tx))) + .await; + + let version = rx + .await + .map_err(error::Error::CancelledRuntimeApiVersion)? + .map_err(error::Error::RuntimeApi)?; + + if version == 3 { + Ok(ProspectiveParachainsMode::Enabled) + } else { + if version != 2 { + gum::warn!( + target: LOG_TARGET, + "Runtime API version is {}, expected 2 or 3. Prospective parachains are disabled", + version + ); + } + Ok(ProspectiveParachainsMode::Disabled) + } +} From 19a47b096bfee1342cb81e97968e2cc7f2a2fcea Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 4 Aug 2022 21:30:26 +0300 Subject: [PATCH 12/45] Validator side draft --- .../src/collator_side/mod.rs | 18 +- node/network/collator-protocol/src/lib.rs | 9 + .../src/validator_side/metrics.rs | 123 ++ .../src/validator_side/mod.rs | 1247 +++++++++++------ 4 files changed, 927 insertions(+), 470 deletions(-) create mode 100644 node/network/collator-protocol/src/validator_side/metrics.rs diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index fa5e193fd1ad..0eb225a493c6 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -51,7 +51,9 @@ use polkadot_primitives::v2::{ Hash, Id as ParaId, }; -use super::{prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET}; +use super::{ + prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH, +}; use crate::error::{log_error, Error, FatalError, Result}; mod collation; @@ -79,15 +81,6 @@ const COST_APPARENT_FLOOD: Rep = /// For considerations on this value, see: https://github.com/paritytech/polkadot/issues/4386 const MAX_UNSHARED_UPLOAD_TIME: Duration = Duration::from_millis(150); -/// The maximum depth a candidate can occupy for any relay parent. -/// 'depth' is defined as the amount of blocks between the para -/// head in a relay-chain block's state and a candidate with a -/// particular relay-parent. -/// -/// This value is only used for limiting the number of candidates -/// we accept and distribute per relay parent. -const MAX_CANDIDATE_DEPTH: usize = 4; - /// Info about validators we are currently connected to. /// /// It keeps track to which validators we advertised our collation. @@ -1063,6 +1056,7 @@ where } for (leaf, mode) in removed { + state.active_leaves.remove(leaf); // If the leaf is deactivated it still may stay in the view as a part // of implicit ancestry. Only update the state after the hash is actually // pruned from the block info storage. @@ -1189,7 +1183,7 @@ pub(crate) async fn run( log_error( handle_incoming_request(&mut ctx, &mut state, request).await, - "Handling incoming request" + "Handling incoming collation fetch request V1" )?; } in_req = recv_req_v2 => { @@ -1197,7 +1191,7 @@ pub(crate) async fn run( log_error( handle_incoming_request(&mut ctx, &mut state, request).await, - "Handling incoming request" + "Handling incoming collation fetch request VStaging" )?; } } diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index febad0515d60..c839509509ca 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -46,6 +46,15 @@ mod validator_side; const LOG_TARGET: &'static str = "parachain::collator-protocol"; +/// The maximum depth a candidate can occupy for any relay parent. +/// 'depth' is defined as the amount of blocks between the para +/// head in a relay-chain block's state and a candidate with a +/// particular relay-parent. +/// +/// This value is only used for limiting the number of candidates +/// we accept and distribute per relay parent. +const MAX_CANDIDATE_DEPTH: usize = 4; + /// A collator eviction policy - how fast to evict collators which are inactive. #[derive(Debug, Clone, Copy)] pub struct CollatorEvictionPolicy { diff --git a/node/network/collator-protocol/src/validator_side/metrics.rs b/node/network/collator-protocol/src/validator_side/metrics.rs new file mode 100644 index 000000000000..a011a5f3b43e --- /dev/null +++ b/node/network/collator-protocol/src/validator_side/metrics.rs @@ -0,0 +1,123 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use polkadot_node_subsystem_util::metrics::{self, prometheus}; + +#[derive(Clone, Default)] +pub struct Metrics(Option); + +impl Metrics { + pub fn on_request(&self, succeeded: std::result::Result<(), ()>) { + if let Some(metrics) = &self.0 { + match succeeded { + Ok(()) => metrics.collation_requests.with_label_values(&["succeeded"]).inc(), + Err(()) => metrics.collation_requests.with_label_values(&["failed"]).inc(), + } + } + } + + /// Provide a timer for `process_msg` which observes on drop. + pub fn time_process_msg(&self) -> Option { + self.0.as_ref().map(|metrics| metrics.process_msg.start_timer()) + } + + /// Provide a timer for `handle_collation_request_result` which observes on drop. + pub fn time_handle_collation_request_result( + &self, + ) -> Option { + self.0 + .as_ref() + .map(|metrics| metrics.handle_collation_request_result.start_timer()) + } + + /// Note the current number of collator peers. + pub fn note_collator_peer_count(&self, collator_peers: usize) { + self.0 + .as_ref() + .map(|metrics| metrics.collator_peer_count.set(collator_peers as u64)); + } + + /// Provide a timer for `PerRequest` structure which observes on drop. + pub fn time_collation_request_duration( + &self, + ) -> Option { + self.0.as_ref().map(|metrics| metrics.collation_request_duration.start_timer()) + } +} + +#[derive(Clone)] +struct MetricsInner { + collation_requests: prometheus::CounterVec, + process_msg: prometheus::Histogram, + handle_collation_request_result: prometheus::Histogram, + collator_peer_count: prometheus::Gauge, + collation_request_duration: prometheus::Histogram, +} + +impl metrics::Metrics for Metrics { + fn try_register( + registry: &prometheus::Registry, + ) -> std::result::Result { + let metrics = MetricsInner { + collation_requests: prometheus::register( + prometheus::CounterVec::new( + prometheus::Opts::new( + "polkadot_parachain_collation_requests_total", + "Number of collations requested from Collators.", + ), + &["success"], + )?, + registry, + )?, + process_msg: prometheus::register( + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_validator_process_msg", + "Time spent within `collator_protocol_validator::process_msg`", + ) + )?, + registry, + )?, + handle_collation_request_result: prometheus::register( + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_validator_handle_collation_request_result", + "Time spent within `collator_protocol_validator::handle_collation_request_result`", + ) + )?, + registry, + )?, + collator_peer_count: prometheus::register( + prometheus::Gauge::new( + "polkadot_parachain_collator_peer_count", + "Amount of collator peers connected", + )?, + registry, + )?, + collation_request_duration: prometheus::register( + prometheus::Histogram::with_opts( + prometheus::HistogramOpts::new( + "polkadot_parachain_collator_protocol_validator_collation_request_duration", + "Lifetime of the `PerRequest` structure", + ).buckets(vec![0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.2, 1.5, 1.75]), + )?, + registry, + )?, + }; + + Ok(Metrics(Some(metrics))) + } +} diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index acc6cfbe4743..2c15fb641108 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -24,8 +24,7 @@ use futures::{ }; use futures_timer::Delay; use std::{ - collections::{hash_map::Entry, HashMap, HashSet}, - sync::Arc, + collections::{hash_map::Entry, HashMap, HashSet, VecDeque}, task::Poll, time::{Duration, Instant}, }; @@ -38,33 +37,42 @@ use polkadot_node_network_protocol::{ request_response as req_res, request_response::{ outgoing::{Recipient, RequestError}, - v1::{CollationFetchingRequest, CollationFetchingResponse}, - OutgoingRequest, Requests, + v1 as request_v1, vstaging as request_vstaging, OutgoingRequest, Requests, }, - v1 as protocol_v1, OurView, PeerId, UnifiedReputationChange as Rep, Versioned, View, + v1 as protocol_v1, vstaging as protocol_vstaging, OurView, PeerId, + UnifiedReputationChange as Rep, Versioned, View, }; -use polkadot_node_primitives::{PoV, SignedFullStatement}; +use polkadot_node_primitives::{PoV, SignedFullStatement, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, NetworkBridgeEvent, - NetworkBridgeMessage, RuntimeApiMessage, + NetworkBridgeMessage, }, - overseer, FromOrchestra, OverseerSignal, PerLeafSpan, SubsystemSender, + overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, +}; +use polkadot_node_subsystem_util::{ + backing_implicit_view::View as ImplicitView, metrics::prometheus::prometheus::HistogramTimer, }; -use polkadot_node_subsystem_util::metrics::{self, prometheus}; use polkadot_primitives::v2::{ - CandidateReceipt, CollatorId, Hash, Id as ParaId, OccupiedCoreAssumption, + CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId, OccupiedCoreAssumption, PersistedValidationData, }; -use crate::error::Result; +use crate::error::{Error, Result}; + +use super::{ + modify_reputation, prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET, + MAX_CANDIDATE_DEPTH, +}; -use super::{modify_reputation, LOG_TARGET}; +mod metrics; #[cfg(test)] mod tests; +pub use metrics::Metrics; + const COST_UNEXPECTED_MESSAGE: Rep = Rep::CostMinor("An unexpected message"); /// Message could not be decoded properly. const COST_CORRUPTED_MESSAGE: Rep = Rep::CostMinor("Message was corrupt"); @@ -102,121 +110,16 @@ const ACTIVITY_POLL: Duration = Duration::from_millis(10); // See https://github.com/paritytech/polkadot/issues/4182 const CHECK_COLLATIONS_POLL: Duration = Duration::from_millis(5); -#[derive(Clone, Default)] -pub struct Metrics(Option); - -impl Metrics { - fn on_request(&self, succeeded: std::result::Result<(), ()>) { - if let Some(metrics) = &self.0 { - match succeeded { - Ok(()) => metrics.collation_requests.with_label_values(&["succeeded"]).inc(), - Err(()) => metrics.collation_requests.with_label_values(&["failed"]).inc(), - } - } - } - - /// Provide a timer for `process_msg` which observes on drop. - fn time_process_msg(&self) -> Option { - self.0.as_ref().map(|metrics| metrics.process_msg.start_timer()) - } - - /// Provide a timer for `handle_collation_request_result` which observes on drop. - fn time_handle_collation_request_result( - &self, - ) -> Option { - self.0 - .as_ref() - .map(|metrics| metrics.handle_collation_request_result.start_timer()) - } - - /// Note the current number of collator peers. - fn note_collator_peer_count(&self, collator_peers: usize) { - self.0 - .as_ref() - .map(|metrics| metrics.collator_peer_count.set(collator_peers as u64)); - } - - /// Provide a timer for `PerRequest` structure which observes on drop. - fn time_collation_request_duration( - &self, - ) -> Option { - self.0.as_ref().map(|metrics| metrics.collation_request_duration.start_timer()) - } -} - -#[derive(Clone)] -struct MetricsInner { - collation_requests: prometheus::CounterVec, - process_msg: prometheus::Histogram, - handle_collation_request_result: prometheus::Histogram, - collator_peer_count: prometheus::Gauge, - collation_request_duration: prometheus::Histogram, -} - -impl metrics::Metrics for Metrics { - fn try_register( - registry: &prometheus::Registry, - ) -> std::result::Result { - let metrics = MetricsInner { - collation_requests: prometheus::register( - prometheus::CounterVec::new( - prometheus::Opts::new( - "polkadot_parachain_collation_requests_total", - "Number of collations requested from Collators.", - ), - &["success"], - )?, - registry, - )?, - process_msg: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_validator_process_msg", - "Time spent within `collator_protocol_validator::process_msg`", - ) - )?, - registry, - )?, - handle_collation_request_result: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_validator_handle_collation_request_result", - "Time spent within `collator_protocol_validator::handle_collation_request_result`", - ) - )?, - registry, - )?, - collator_peer_count: prometheus::register( - prometheus::Gauge::new( - "polkadot_parachain_collator_peer_count", - "Amount of collator peers connected", - )?, - registry, - )?, - collation_request_duration: prometheus::register( - prometheus::Histogram::with_opts( - prometheus::HistogramOpts::new( - "polkadot_parachain_collator_protocol_validator_collation_request_duration", - "Lifetime of the `PerRequest` structure", - ).buckets(vec![0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.75, 0.9, 1.0, 1.2, 1.5, 1.75]), - )?, - registry, - )?, - }; - - Ok(Metrics(Some(metrics))) - } -} - struct PerRequest { /// Responses from collator. - from_collator: Fuse>>, + from_collator: + Fuse>>, /// Sender to forward to initial requester. to_requester: oneshot::Sender<(CandidateReceipt, PoV)>, /// A jaeger span corresponding to the lifetime of the request. span: Option, /// A metric histogram for the lifetime of the request - _lifetime_timer: Option, + _lifetime_timer: Option, } #[derive(Debug)] @@ -224,7 +127,7 @@ struct CollatingPeerState { collator_id: CollatorId, para_id: ParaId, // Advertised relay parents. - advertisements: HashSet, + advertisements: HashMap>, last_active: Instant, } @@ -238,9 +141,17 @@ enum PeerState { #[derive(Debug)] enum AdvertisementError { + /// Advertisement is already known. Duplicate, + /// Collation relay parent is out of our view. OutOfOurView, + /// No prior declare message received. UndeclaredCollator, + /// A limit for announcements per peer is reached. + LimitReached, + /// Mismatch of relay parent mode and advertisement arguments. + /// An internal error that should not happen. + InvalidArguments, } #[derive(Debug)] @@ -256,19 +167,61 @@ impl PeerData { /// Update the view, clearing all advertisements that are no longer in the /// current view. - fn update_view(&mut self, new_view: View) { + fn update_view( + &mut self, + implicit_view: &ImplicitView, + active_leaves: &HashMap, + per_relay_parent: &HashMap, + new_view: View, + ) { let old_view = std::mem::replace(&mut self.view, new_view); if let PeerState::Collating(ref mut peer_state) = self.state { for removed in old_view.difference(&self.view) { - let _ = peer_state.advertisements.remove(&removed); + // Only keep advertisements if prospective parachains + // are enabled and the relay parent is a part of allowed + // ancestry. + let relay_parent_mode_enabled = per_relay_parent + .get(removed) + .map_or(false, |s| s.prospective_parachains_mode.is_enabled()); + let keep = relay_parent_mode_enabled && + is_relay_parent_in_view( + removed, + ProspectiveParachainsMode::Enabled, + implicit_view, + active_leaves, + peer_state.para_id, + ); + + if !keep { + peer_state.advertisements.remove(&removed); + } } } } /// Prune old advertisements relative to our view. - fn prune_old_advertisements(&mut self, our_view: &View) { + fn prune_old_advertisements( + &mut self, + implicit_view: &ImplicitView, + active_leaves: &HashMap, + per_relay_parent: &HashMap, + ) { if let PeerState::Collating(ref mut peer_state) = self.state { - peer_state.advertisements.retain(|a| our_view.contains(a)); + peer_state.advertisements.retain(|hash, _| { + // Either + // - Relay parent is an active leaf + // - It belongs to allowed ancestry under some leaf + // Discard otherwise. + per_relay_parent.get(hash).map_or(false, |s| { + is_relay_parent_in_view( + hash, + s.prospective_parachains_mode, + implicit_view, + active_leaves, + peer_state.para_id, + ) + }) + }); } } @@ -278,18 +231,52 @@ impl PeerData { fn insert_advertisement( &mut self, on_relay_parent: Hash, - our_view: &View, + relay_parent_mode: ProspectiveParachainsMode, + candidate_hash: Option, + implicit_view: &ImplicitView, + active_leaves: &HashMap, ) -> std::result::Result<(CollatorId, ParaId), AdvertisementError> { match self.state { PeerState::Connected(_) => Err(AdvertisementError::UndeclaredCollator), - _ if !our_view.contains(&on_relay_parent) => Err(AdvertisementError::OutOfOurView), - PeerState::Collating(ref mut state) => - if state.advertisements.insert(on_relay_parent) { - state.last_active = Instant::now(); - Ok((state.collator_id.clone(), state.para_id.clone())) - } else { - Err(AdvertisementError::Duplicate) - }, + PeerState::Collating(ref mut state) => { + if !is_relay_parent_in_view( + &on_relay_parent, + relay_parent_mode, + implicit_view, + active_leaves, + state.para_id, + ) { + return Err(AdvertisementError::OutOfOurView) + } + + match (relay_parent_mode, candidate_hash) { + (ProspectiveParachainsMode::Disabled, None) => { + if state.advertisements.contains_key(&on_relay_parent) { + return Err(AdvertisementError::Duplicate) + } + state.advertisements.insert(on_relay_parent, HashSet::new()); + }, + (ProspectiveParachainsMode::Enabled, Some(candidate_hash)) => { + if state + .advertisements + .get(&on_relay_parent) + .map_or(false, |candidates| candidates.contains(&candidate_hash)) + { + return Err(AdvertisementError::Duplicate) + } + let candidates = state.advertisements.entry(on_relay_parent).or_default(); + + if candidates.len() >= MAX_CANDIDATE_DEPTH + 1 { + return Err(AdvertisementError::LimitReached) + } + candidates.insert(candidate_hash); + }, + _ => return Err(AdvertisementError::InvalidArguments), + } + + state.last_active = Instant::now(); + Ok((state.collator_id, state.para_id)) + }, } } @@ -309,7 +296,7 @@ impl PeerData { self.state = PeerState::Collating(CollatingPeerState { collator_id, para_id, - advertisements: HashSet::new(), + advertisements: HashMap::new(), last_active: Instant::now(), }); } @@ -329,10 +316,23 @@ impl PeerData { } /// Whether the peer has advertised the given collation. - fn has_advertised(&self, relay_parent: &Hash) -> bool { - match self.state { - PeerState::Connected(_) => false, - PeerState::Collating(ref state) => state.advertisements.contains(relay_parent), + fn has_advertised( + &self, + relay_parent: &Hash, + maybe_candidate_hash: Option, + ) -> bool { + let collating_state = match self.state { + PeerState::Connected(_) => return false, + PeerState::Collating(ref state) => state, + }; + + if let Some(ref candidate_hash) = maybe_candidate_hash { + collating_state + .advertisements + .get(relay_parent) + .map_or(false, |candidates| candidates.contains(candidate_hash)) + } else { + collating_state.advertisements.contains_key(relay_parent) } } @@ -352,127 +352,25 @@ impl Default for PeerData { } } -struct GroupAssignments { - current: Option, -} - -#[derive(Default)] -struct ActiveParas { - relay_parent_assignments: HashMap, - current_assignments: HashMap, +/// Identifier of a fetched collation. +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +struct FetchedCollation { + relay_parent: Hash, + para_id: ParaId, + candidate_hash: CandidateHash, + collator_id: CollatorId, } -impl ActiveParas { - async fn assign_incoming( - &mut self, - sender: &mut impl SubsystemSender, - keystore: &SyncCryptoStorePtr, - new_relay_parents: impl IntoIterator, - ) { - for relay_parent in new_relay_parents { - let mv = polkadot_node_subsystem_util::request_validators(relay_parent, sender) - .await - .await - .ok() - .map(|x| x.ok()) - .flatten(); - - let mg = polkadot_node_subsystem_util::request_validator_groups(relay_parent, sender) - .await - .await - .ok() - .map(|x| x.ok()) - .flatten(); - - let mc = polkadot_node_subsystem_util::request_availability_cores(relay_parent, sender) - .await - .await - .ok() - .map(|x| x.ok()) - .flatten(); - - let (validators, groups, rotation_info, cores) = match (mv, mg, mc) { - (Some(v), Some((g, r)), Some(c)) => (v, g, r, c), - _ => { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - "Failed to query runtime API for relay-parent", - ); - - continue - }, - }; - - let para_now = - match polkadot_node_subsystem_util::signing_key_and_index(&validators, keystore) - .await - .and_then(|(_, index)| { - polkadot_node_subsystem_util::find_validator_group(&groups, index) - }) { - Some(group) => { - let core_now = rotation_info.core_for_group(group, cores.len()); - - cores.get(core_now.0 as usize).and_then(|c| c.para_id()) - }, - None => { - gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator"); - - continue - }, - }; - - // This code won't work well, if at all for parathreads. For parathreads we'll - // have to be aware of which core the parathread claim is going to be multiplexed - // onto. The parathread claim will also have a known collator, and we should always - // allow an incoming connection from that collator. If not even connecting to them - // directly. - // - // However, this'll work fine for parachains, as each parachain gets a dedicated - // core. - if let Some(para_now) = para_now { - let entry = self.current_assignments.entry(para_now).or_default(); - *entry += 1; - if *entry == 1 { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - para_id = ?para_now, - "Assigned to a parachain", - ); - } - } - - self.relay_parent_assignments - .insert(relay_parent, GroupAssignments { current: para_now }); - } - } - - fn remove_outgoing(&mut self, old_relay_parents: impl IntoIterator) { - for old_relay_parent in old_relay_parents { - if let Some(assignments) = self.relay_parent_assignments.remove(&old_relay_parent) { - let GroupAssignments { current } = assignments; - - if let Some(cur) = current { - if let Entry::Occupied(mut occupied) = self.current_assignments.entry(cur) { - *occupied.get_mut() -= 1; - if *occupied.get() == 0 { - occupied.remove_entry(); - gum::debug!( - target: LOG_TARGET, - para_id = ?cur, - "Unassigned from a parachain", - ); - } - } - } - } +impl From<&CandidateReceipt> for FetchedCollation { + fn from(receipt: &CandidateReceipt) -> Self { + let descriptor = receipt.descriptor(); + Self { + relay_parent: descriptor.relay_parent, + para_id: descriptor.para_id, + candidate_hash: receipt.hash(), + collator_id: descriptor.collator.clone(), } } - - fn is_current(&self, id: &ParaId) -> bool { - self.current_assignments.contains_key(id) - } } #[derive(Debug, Clone, Hash, Eq, PartialEq)] @@ -480,15 +378,22 @@ struct PendingCollation { relay_parent: Hash, para_id: ParaId, peer_id: PeerId, + candidate_hash: Option, commitments_hash: Option, } impl PendingCollation { - fn new(relay_parent: Hash, para_id: &ParaId, peer_id: &PeerId) -> Self { + fn new( + relay_parent: Hash, + para_id: &ParaId, + peer_id: &PeerId, + candidate_hash: Option, + ) -> Self { Self { relay_parent, para_id: para_id.clone(), peer_id: peer_id.clone(), + candidate_hash, commitments_hash: None, } } @@ -520,9 +425,14 @@ impl Default for CollationStatus { impl CollationStatus { /// Downgrades to `Waiting`, but only if `self != Seconded`. - fn back_to_waiting(&mut self) { + fn back_to_waiting(&mut self, relay_parent_mode: ProspectiveParachainsMode) { match self { - Self::Seconded => {}, + Self::Seconded => + if relay_parent_mode.is_enabled() { + // With async backing enabled it's allowed to + // second more candidates. + *self = Self::Waiting + }, _ => *self = Self::Waiting, } } @@ -530,68 +440,128 @@ impl CollationStatus { /// Information about collations per relay parent. #[derive(Default)] -struct CollationsPerRelayParent { +struct Collations { /// What is the current status in regards to a collation for this relay parent? status: CollationStatus, - /// Collation currently being fetched. + /// Collator we're fetching from. /// /// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME` /// yet. - waiting_collation: Option, + fetching_from: Option, /// Collation that were advertised to us, but we did not yet fetch. - unfetched_collations: Vec<(PendingCollation, CollatorId)>, + waiting_queue: VecDeque<(PendingCollation, CollatorId)>, + /// How many collations have been seconded per parachain. + /// Only used when async backing is enabled. + seconded_count: HashMap, } -impl CollationsPerRelayParent { +impl Collations { /// Returns the next collation to fetch from the `unfetched_collations`. /// /// This will reset the status back to `Waiting` using [`CollationStatus::back_to_waiting`]. /// /// Returns `Some(_)` if there is any collation to fetch, the `status` is not `Seconded` and /// the passed in `finished_one` is the currently `waiting_collation`. - pub fn get_next_collation_to_fetch( + fn get_next_collation_to_fetch( &mut self, finished_one: Option<&CollatorId>, + relay_parent_mode: ProspectiveParachainsMode, ) -> Option<(PendingCollation, CollatorId)> { // If finished one does not match waiting_collation, then we already dequeued another fetch // to replace it. - if self.waiting_collation.as_ref() != finished_one { + if self.fetching_from.as_ref() != finished_one { gum::trace!( target: LOG_TARGET, - waiting_collation = ?self.waiting_collation, + waiting_collation = ?self.fetching_from, ?finished_one, "Not proceeding to the next collation - has already been done." ); return None } - self.status.back_to_waiting(); + self.status.back_to_waiting(relay_parent_mode); match self.status { // We don't need to fetch any other collation when we already have seconded one. CollationStatus::Seconded => None, CollationStatus::Waiting => { - let next = self.unfetched_collations.pop(); - self.waiting_collation = next.as_ref().map(|(_, collator_id)| collator_id.clone()); - next + while let Some(next) = self.waiting_queue.pop_front() { + let para_id = next.0.para_id; + if !self.is_fetch_allowed(relay_parent_mode, para_id) { + continue + } + + return Some(next) + } + + None }, CollationStatus::WaitingOnValidation | CollationStatus::Fetching => unreachable!("We have reset the status above!"), } } + + /// Checks the limit of seconded candidates for a given para. + fn is_fetch_allowed( + &self, + relay_parent_mode: ProspectiveParachainsMode, + para_id: ParaId, + ) -> bool { + let seconded_limit = + if relay_parent_mode.is_enabled() { MAX_CANDIDATE_DEPTH + 1 } else { 1 }; + self.seconded_count.get(¶_id).map_or(true, |&num| num < seconded_limit) + } +} + +#[derive(Debug, Copy, Clone)] +struct GroupAssignments { + current: Option, +} + +struct PerRelayParent { + prospective_parachains_mode: ProspectiveParachainsMode, + assignment: GroupAssignments, + collations: Collations, +} + +impl PerRelayParent { + fn new(mode: ProspectiveParachainsMode) -> Self { + Self { + prospective_parachains_mode: mode, + assignment: GroupAssignments { current: None }, + collations: Collations::default(), + } + } } /// All state relevant for the validator side of the protocol lives here. #[derive(Default)] struct State { - /// Our own view. - view: OurView, + /// Leaves that do support asynchronous backing along with + /// implicit ancestry. Leaves from the implicit view are present in + /// `active_leaves`, the opposite doesn't hold true. + /// + /// Relay-chain blocks which don't support prospective parachains are + /// never included in the fragment trees of active leaves which do. In + /// particular, this means that if a given relay parent belongs to implicit + /// ancestry of some active leaf, then it does support prospective parachains. + implicit_view: ImplicitView, + + /// All active leaves observed by us, including both that do and do not + /// support prospective parachains. This mapping works as a replacement for + /// [`polkadot_node_network_protocol::View`] and can be dropped once the transition + /// to asynchronous backing is done. + active_leaves: HashMap, - /// Active paras based on our view. We only accept collators from these paras. - active_paras: ActiveParas, + /// State tracked + per_relay_parent: HashMap, /// Track all active collators and their data. peer_data: HashMap, + /// Parachains we're currently assigned to. With async backing enabled + /// this includes assignments from the implicit view. + current_assignments: HashMap, + /// The collations we have requested by relay parent and para id. /// /// For each relay parent and para id we may be connected to a number @@ -615,11 +585,131 @@ struct State { /// another collator the chance to be faster (dequeue next fetch request as well). collation_fetch_timeouts: FuturesUnordered>, - /// Information about the collations per relay parent. - collations_per_relay_parent: HashMap, - /// Keep track of all pending candidate collations - pending_candidates: HashMap, + fetched_candidates: HashMap, +} + +fn is_relay_parent_in_view( + relay_parent: &Hash, + relay_parent_mode: ProspectiveParachainsMode, + implicit_view: &ImplicitView, + active_leaves: &HashMap, + para_id: ParaId, +) -> bool { + match relay_parent_mode { + ProspectiveParachainsMode::Disabled => true, + ProspectiveParachainsMode::Enabled => active_leaves.iter().any(|(hash, mode)| { + mode.is_enabled() && + implicit_view + .known_allowed_relay_parents_under(hash, Some(para_id)) + .unwrap_or_default() + .contains(relay_parent) + }), + } +} + +async fn assign_incoming( + sender: &mut Sender, + group_assignment: &mut GroupAssignments, + current_assignments: &mut HashMap, + keystore: &SyncCryptoStorePtr, + relay_parent: Hash, +) where + Sender: CollatorProtocolSenderTrait, +{ + let mv = polkadot_node_subsystem_util::request_validators(relay_parent, sender) + .await + .await + .ok() + .map(|x| x.ok()) + .flatten(); + + let mg = polkadot_node_subsystem_util::request_validator_groups(relay_parent, sender) + .await + .await + .ok() + .map(|x| x.ok()) + .flatten(); + + let mc = polkadot_node_subsystem_util::request_availability_cores(relay_parent, sender) + .await + .await + .ok() + .map(|x| x.ok()) + .flatten(); + + let (validators, groups, rotation_info, cores) = match (mv, mg, mc) { + (Some(v), Some((g, r)), Some(c)) => (v, g, r, c), + _ => { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + "Failed to query runtime API for relay-parent", + ); + + return + }, + }; + + let para_now = match polkadot_node_subsystem_util::signing_key_and_index(&validators, keystore) + .await + .and_then(|(_, index)| polkadot_node_subsystem_util::find_validator_group(&groups, index)) + { + Some(group) => { + let core_now = rotation_info.core_for_group(group, cores.len()); + + cores.get(core_now.0 as usize).and_then(|c| c.para_id()) + }, + None => { + gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator"); + + return + }, + }; + + // This code won't work well, if at all for parathreads. For parathreads we'll + // have to be aware of which core the parathread claim is going to be multiplexed + // onto. The parathread claim will also have a known collator, and we should always + // allow an incoming connection from that collator. If not even connecting to them + // directly. + // + // However, this'll work fine for parachains, as each parachain gets a dedicated + // core. + if let Some(para_now) = para_now { + let entry = current_assignments.entry(para_now).or_default(); + *entry += 1; + if *entry == 1 { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + para_id = ?para_now, + "Assigned to a parachain", + ); + } + } + + *group_assignment = GroupAssignments { current: para_now }; +} + +fn remove_outgoing( + current_assignments: &mut HashMap, + per_relay_parent: PerRelayParent, +) { + let GroupAssignments { current } = per_relay_parent.assignment; + + if let Some(cur) = current { + if let Entry::Occupied(mut occupied) = current_assignments.entry(cur) { + *occupied.get_mut() -= 1; + if *occupied.get() == 0 { + occupied.remove_entry(); + gum::debug!( + target: LOG_TARGET, + para_id = ?cur, + "Unassigned from a parachain", + ); + } + } + } } // O(n) search for collator ID by iterating through the peers map. This should be fast enough @@ -648,19 +738,32 @@ async fn fetch_collation( ) { let (tx, rx) = oneshot::channel(); - let PendingCollation { relay_parent, para_id, peer_id, .. } = pc; - - let timeout = |collator_id, relay_parent| async move { - Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; - (collator_id, relay_parent) - }; - state - .collation_fetch_timeouts - .push(timeout(id.clone(), relay_parent.clone()).boxed()); + let PendingCollation { relay_parent, para_id, peer_id, candidate_hash, .. } = pc; if let Some(peer_data) = state.peer_data.get(&peer_id) { - if peer_data.has_advertised(&relay_parent) { - request_collation(sender, state, relay_parent, para_id, peer_id, tx).await; + // If candidate hash is `Some` then relay parent supports prospective + // parachains. + if peer_data.has_advertised(&relay_parent, candidate_hash) { + let timeout = |collator_id, relay_parent| async move { + Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; + (collator_id, relay_parent) + }; + state + .collation_fetch_timeouts + .push(timeout(id.clone(), relay_parent.clone()).boxed()); + request_collation( + sender, + state, + relay_parent, + para_id, + candidate_hash, + peer_id, + id.clone(), + tx, + ) + .await; + + state.collation_fetches.push(rx.map(|r| ((id, pc), r)).boxed()); } else { gum::debug!( target: LOG_TARGET, @@ -679,8 +782,6 @@ async fn fetch_collation( "Requested to fetch a collation from an unknown peer", ); } - - state.collation_fetches.push(rx.map(|r| ((id, pc), r)).boxed()); } /// Report a collator for some malicious actions. @@ -730,10 +831,15 @@ async fn notify_collation_seconded( async fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) -> Result<()> { let peer_data = state.peer_data.entry(peer_id.clone()).or_default(); - peer_data.update_view(view); + peer_data.update_view( + &state.implicit_view, + &state.active_leaves, + &state.per_relay_parent, + view, + ); state .requested_collations - .retain(|pc, _| pc.peer_id != peer_id || !peer_data.has_advertised(&pc.relay_parent)); + .retain(|pc, _| pc.peer_id != peer_id || !peer_data.has_advertised(&pc.relay_parent, None)); Ok(()) } @@ -749,20 +855,26 @@ async fn request_collation( state: &mut State, relay_parent: Hash, para_id: ParaId, + candidate_hash: Option, peer_id: PeerId, + collator_id: CollatorId, result: oneshot::Sender<(CandidateReceipt, PoV)>, ) { - if !state.view.contains(&relay_parent) { - gum::debug!( - target: LOG_TARGET, - peer_id = %peer_id, - para_id = %para_id, - relay_parent = %relay_parent, - "collation is no longer in view", - ); - return - } - let pending_collation = PendingCollation::new(relay_parent, ¶_id, &peer_id); + let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { + Some(state) => state, + None => { + gum::debug!( + target: LOG_TARGET, + peer_id = %peer_id, + para_id = %para_id, + relay_parent = %relay_parent, + "Collation relay parent is out of view", + ); + return + }, + }; + let relay_parent_mode = per_relay_parent.prospective_parachains_mode; + let pending_collation = PendingCollation::new(relay_parent, ¶_id, &peer_id, candidate_hash); if state.requested_collations.contains_key(&pending_collation) { gum::warn!( target: LOG_TARGET, @@ -774,14 +886,41 @@ async fn request_collation( return } - let (full_request, response_recv) = OutgoingRequest::new( - Recipient::Peer(peer_id), - CollationFetchingRequest { relay_parent, para_id }, - ); - let requests = Requests::CollationFetchingV1(full_request); + let (requests, response_recv) = match (relay_parent_mode, candidate_hash) { + (ProspectiveParachainsMode::Disabled, None) => { + let (req, response_recv) = OutgoingRequest::new( + Recipient::Peer(peer_id), + request_v1::CollationFetchingRequest { relay_parent, para_id }, + ); + let requests = Requests::CollationFetchingV1(req); + (requests, response_recv.boxed()) + }, + (ProspectiveParachainsMode::Enabled, Some(candidate_hash)) => { + let (req, response_recv) = OutgoingRequest::new( + Recipient::Peer(peer_id), + request_vstaging::CollationFetchingRequest { + relay_parent, + para_id, + candidate_hash, + }, + ); + let requests = Requests::CollationFetchingVStaging(req); + (requests, response_recv.boxed()) + }, + _ => { + gum::error!( + target: LOG_TARGET, + peer_id = %peer_id, + %para_id, + ?relay_parent, + "Invalid arguments for collation request", + ); + return + }, + }; let per_request = PerRequest { - from_collator: response_recv.boxed().fuse(), + from_collator: response_recv.fuse(), to_requester: result, span: state .span_per_relay_parent @@ -790,9 +929,10 @@ async fn request_collation( _lifetime_timer: state.metrics.time_collation_request_duration(), }; - state - .requested_collations - .insert(PendingCollation::new(relay_parent, ¶_id, &peer_id), per_request); + state.requested_collations.insert( + PendingCollation::new(relay_parent, ¶_id, &peer_id, candidate_hash), + per_request, + ); gum::debug!( target: LOG_TARGET, @@ -802,6 +942,9 @@ async fn request_collation( "Requesting collation", ); + per_relay_parent.collations.status = CollationStatus::Fetching; + per_relay_parent.collations.fetching_from.replace(collator_id); + sender .send_message(NetworkBridgeMessage::SendRequests( vec![requests], @@ -816,12 +959,18 @@ async fn process_incoming_peer_message( ctx: &mut Context, state: &mut State, origin: PeerId, - msg: protocol_v1::CollatorProtocolMessage, + msg: Versioned< + protocol_v1::CollatorProtocolMessage, + protocol_vstaging::CollatorProtocolMessage, + >, ) { - use protocol_v1::CollatorProtocolMessage::*; + use protocol_v1::CollatorProtocolMessage as V1; + use protocol_vstaging::CollatorProtocolMessage as VStaging; use sp_runtime::traits::AppVerify; + match msg { - Declare(collator_id, para_id, signature) => { + Versioned::V1(V1::Declare(collator_id, para_id, signature)) | + Versioned::VStaging(VStaging::Declare(collator_id, para_id, signature)) => { if collator_peer_id(&state.peer_data, &collator_id).is_some() { modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; return @@ -863,7 +1012,7 @@ async fn process_incoming_peer_message( return } - if state.active_paras.is_current(¶_id) { + if state.current_assignments.contains_key(¶_id) { gum::debug!( target: LOG_TARGET, peer_id = ?origin, @@ -887,165 +1036,325 @@ async fn process_incoming_peer_message( disconnect_peer(ctx.sender(), origin).await; } }, - AdvertiseCollation(relay_parent) => { - let _span = state - .span_per_relay_parent - .get(&relay_parent) - .map(|s| s.child("advertise-collation")); - if !state.view.contains(&relay_parent) { + Versioned::V1(V1::AdvertiseCollation(relay_parent)) => + handle_advertisement(ctx.sender(), state, relay_parent, &origin, None).await, + Versioned::VStaging(VStaging::AdvertiseCollation { + relay_parent, + candidate_hash, + parent_head_data_hash, + }) => + handle_advertisement( + ctx.sender(), + state, + relay_parent, + &origin, + Some((candidate_hash, parent_head_data_hash)), + ) + .await, + Versioned::V1(V1::CollationSeconded(..)) | + Versioned::VStaging(VStaging::CollationSeconded(..)) => { + gum::warn!( + target: LOG_TARGET, + peer_id = ?origin, + "Unexpected `CollationSeconded` message, decreasing reputation", + ); + + modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; + }, + } +} + +// async fn request_hypothetical_depth( +// sender: &mut Sender, +// relay_parent: Hash, +// candidate_hash: CandidateHash, +// para_id: ParaId, +// ) -> Option> +// where +// Sender: CollatorProtocolSenderTrait, { +// let (tx, rx) = oneshot::channel(); + +// let request = HypotheticalDepthRequest { +// candidate_hash, +// candidate_para: todo!(), +// parent_head_data_hash: todo!(), +// candidate_relay_parent: todo!(), +// fragment_tree_relay_parent: todo!(), +// }; +// } + +async fn handle_advertisement( + sender: &mut Sender, + state: &mut State, + relay_parent: Hash, + peer_id: &PeerId, + vstaging_args: Option<(CandidateHash, Hash)>, +) where + Sender: CollatorProtocolSenderTrait, +{ + let _span = state + .span_per_relay_parent + .get(&relay_parent) + .map(|s| s.child("advertise-collation")); + let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { + Some(state) => state, + None => { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Advertise collation out of view", + ); + + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + return + }, + }; + let relay_parent_mode = per_relay_parent.prospective_parachains_mode; + + let peer_data = match state.peer_data.get_mut(&peer_id) { + None => { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Advertise collation message has been received from an unknown peer", + ); + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + return + }, + Some(p) => p, + }; + let para_id = if let Some(id) = peer_data.collating_para() { + id + } else { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Advertise collation message received from undeclared peer", + ); + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + return + }; + + let insert_result = match (relay_parent_mode, vstaging_args) { + (ProspectiveParachainsMode::Disabled, None) => peer_data.insert_advertisement( + relay_parent, + relay_parent_mode, + None, + &state.implicit_view, + &state.active_leaves, + ), + (ProspectiveParachainsMode::Enabled, Some((candidate_hash, parent_head_data_hash))) => { + // TODO [now]: request hypothetical depth and check for backed parent nodes + // in a fragment tree. + peer_data.insert_advertisement( + relay_parent, + relay_parent_mode, + Some(candidate_hash), + &state.implicit_view, + &state.active_leaves, + ) + }, + _ => { + gum::warn!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Invalid arguments for advertisement", + ); + return + }, + }; + + match insert_result { + Ok((id, para_id)) => { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + %para_id, + ?relay_parent, + "Received advertise collation", + ); + + let maybe_candidate_hash = vstaging_args.map(|(candidate_hash, _)| candidate_hash); + let pending_collation = + PendingCollation::new(relay_parent, ¶_id, peer_id, maybe_candidate_hash); + + let collations = &mut per_relay_parent.collations; + if !collations.is_fetch_allowed(relay_parent_mode, para_id) { gum::debug!( target: LOG_TARGET, - peer_id = ?origin, + peer_id = ?peer_id, + para_id = ?para_id, ?relay_parent, - "Advertise collation out of view", + "Seconded collations limit reached", ); - - modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; return } - let peer_data = match state.peer_data.get_mut(&origin) { - None => { - gum::debug!( + match collations.status { + CollationStatus::Fetching | CollationStatus::WaitingOnValidation => { + gum::trace!( target: LOG_TARGET, - peer_id = ?origin, - ?relay_parent, - "Advertise collation message has been received from an unknown peer", - ); - modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; - return - }, - Some(p) => p, - }; - - match peer_data.insert_advertisement(relay_parent, &state.view) { - Ok((id, para_id)) => { - gum::debug!( - target: LOG_TARGET, - peer_id = ?origin, + peer_id = ?peer_id, %para_id, ?relay_parent, - "Received advertise collation", + "Added collation to the pending list" ); - - let pending_collation = PendingCollation::new(relay_parent, ¶_id, &origin); - - let collations = - state.collations_per_relay_parent.entry(relay_parent).or_default(); - - match collations.status { - CollationStatus::Fetching | CollationStatus::WaitingOnValidation => { - gum::trace!( - target: LOG_TARGET, - peer_id = ?origin, - %para_id, - ?relay_parent, - "Added collation to the pending list" - ); - collations.unfetched_collations.push((pending_collation, id)); - }, - CollationStatus::Waiting => { - collations.status = CollationStatus::Fetching; - collations.waiting_collation = Some(id.clone()); - - fetch_collation(ctx.sender(), state, pending_collation.clone(), id) - .await; - }, - CollationStatus::Seconded => { - gum::trace!( - target: LOG_TARGET, - peer_id = ?origin, - %para_id, - ?relay_parent, - "Valid seconded collation" - ); - }, - } + collations.waiting_queue.push_back((pending_collation, id)); }, - Err(error) => { - gum::debug!( + CollationStatus::Waiting => { + fetch_collation(sender, state, pending_collation.clone(), id).await; + }, + CollationStatus::Seconded if relay_parent_mode.is_enabled() => { + // Limit is not reached, it's allowed to second another + // collation. + fetch_collation(sender, state, pending_collation.clone(), id).await; + }, + CollationStatus::Seconded => { + gum::trace!( target: LOG_TARGET, - peer_id = ?origin, + peer_id = ?peer_id, + %para_id, ?relay_parent, - ?error, - "Invalid advertisement", + "A collation has been already seconded", ); - - modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; }, } }, - CollationSeconded(_, _) => { + Err(AdvertisementError::InvalidArguments) => { gum::warn!( target: LOG_TARGET, - peer_id = ?origin, - "Unexpected `CollationSeconded` message, decreasing reputation", + peer_id = ?peer_id, + ?relay_parent, + relay_parent_mode = ?relay_parent_mode, + "Relay parent mode mismatch", ); }, - } -} - -/// A leaf has become inactive so we want to -/// - Cancel all ongoing collation requests that are on top of that leaf. -/// - Remove all stored collations relevant to that leaf. -async fn remove_relay_parent(state: &mut State, relay_parent: Hash) -> Result<()> { - state.requested_collations.retain(|k, _| k.relay_parent != relay_parent); - - state.pending_candidates.retain(|k, _| k != &relay_parent); + Err(error) => { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + ?error, + "Invalid advertisement", + ); - state.collations_per_relay_parent.remove(&relay_parent); - Ok(()) + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + }, + } } /// Our view has changed. -#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] -async fn handle_our_view_change( - ctx: &mut Context, +async fn handle_our_view_change( + sender: &mut Sender, state: &mut State, keystore: &SyncCryptoStorePtr, view: OurView, -) -> Result<()> { - let old_view = std::mem::replace(&mut state.view, view); - - let added: HashMap> = state - .view - .span_per_head() - .iter() - .filter(|v| !old_view.contains(&v.0)) - .map(|v| (v.0.clone(), v.1.clone())) - .collect(); - - added.into_iter().for_each(|(h, s)| { - state.span_per_relay_parent.insert(h, PerLeafSpan::new(s, "validator-side")); - }); - - let added = state.view.difference(&old_view).cloned().collect::>(); - let removed = old_view.difference(&state.view).cloned().collect::>(); - - for removed in removed.iter().cloned() { - remove_relay_parent(state, removed).await?; - state.span_per_relay_parent.remove(&removed); +) -> Result<()> +where + Sender: CollatorProtocolSenderTrait, +{ + let current_leaves = state.active_leaves.clone(); + + let removed = current_leaves.iter().filter(|(h, _)| !view.contains(*h)); + let added = view.iter().filter(|h| !current_leaves.contains_key(h)); + + for leaf in added { + let mode = prospective_parachains_mode(sender, *leaf).await?; + + if let Some(span) = view.span_per_head().get(leaf).cloned() { + let per_leaf_span = PerLeafSpan::new(span, "validator-side"); + state.span_per_relay_parent.insert(*leaf, per_leaf_span); + } + + let mut per_relay_parent = PerRelayParent::new(mode); + assign_incoming( + sender, + &mut per_relay_parent.assignment, + &mut state.current_assignments, + keystore, + *leaf, + ) + .await; + + state.active_leaves.insert(*leaf, mode); + state.per_relay_parent.insert(*leaf, per_relay_parent); + + if mode.is_enabled() { + state + .implicit_view + .activate_leaf(sender, *leaf) + .await + .map_err(Error::ImplicitViewFetchError)?; + + let allowed_ancestry = state + .implicit_view + .known_allowed_relay_parents_under(leaf, None) + .unwrap_or_default(); + for block_hash in allowed_ancestry { + if let Entry::Vacant(entry) = state.per_relay_parent.entry(*block_hash) { + let mut per_relay_parent = + PerRelayParent::new(ProspectiveParachainsMode::Enabled); + assign_incoming( + sender, + &mut per_relay_parent.assignment, + &mut state.current_assignments, + keystore, + *block_hash, + ) + .await; + + entry.insert(per_relay_parent); + } + } + } } - state.active_paras.assign_incoming(ctx.sender(), keystore, added).await; - state.active_paras.remove_outgoing(removed); + for (removed, mode) in removed { + state.active_leaves.remove(removed); + // If the leaf is deactivated it still may stay in the view as a part + // of implicit ancestry. Only update the state after the hash is actually + // pruned from the block info storage. + let pruned = if mode.is_enabled() { + state.implicit_view.deactivate_leaf(*removed) + } else { + vec![*removed] + }; + + for removed in pruned { + if let Some(per_relay_parent) = state.per_relay_parent.remove(&removed) { + remove_outgoing(&mut state.current_assignments, per_relay_parent); + } + + state.requested_collations.retain(|k, _| k.relay_parent != removed); + state.fetched_candidates.retain(|k, _| k.relay_parent != removed); + state.span_per_relay_parent.remove(&removed); + } + } for (peer_id, peer_data) in state.peer_data.iter_mut() { - peer_data.prune_old_advertisements(&state.view); + peer_data.prune_old_advertisements( + &state.implicit_view, + &state.active_leaves, + &state.per_relay_parent, + ); // Disconnect peers who are not relevant to our current or next para. // // If the peer hasn't declared yet, they will be disconnected if they do not // declare. if let Some(para_id) = peer_data.collating_para() { - if !state.active_paras.is_current(¶_id) { + if !state.current_assignments.contains_key(¶_id) { gum::trace!( target: LOG_TARGET, ?peer_id, ?para_id, "Disconnecting peer on view change (not current parachain id)" ); - disconnect_peer(ctx.sender(), peer_id.clone()).await; + disconnect_peer(sender, peer_id.clone()).await; } } } @@ -1079,9 +1388,9 @@ async fn handle_network_msg( handle_peer_view_change(state, peer_id, view).await?; }, OurViewChange(view) => { - handle_our_view_change(ctx, state, keystore, view).await?; + handle_our_view_change(ctx.sender(), state, keystore, view).await?; }, - PeerMessage(remote, Versioned::V1(msg)) => { + PeerMessage(remote, msg) => { process_incoming_peer_message(ctx, state, remote, msg).await; }, PeerMessage(_, Versioned::VStaging(_)) => todo!(), @@ -1129,15 +1438,30 @@ async fn process_msg( } }, Seconded(parent, stmt) => { - if let Some(collation_event) = state.pending_candidates.remove(&parent) { + let receipt = match stmt.payload() { + Statement::Seconded(receipt) => receipt, + Statement::Valid(_) => { + // Seconded statement expected. + return + }, + }; + let fetched_collation = FetchedCollation::from(&receipt.to_plain()); + if let Some(collation_event) = state.fetched_candidates.remove(&fetched_collation) { let (collator_id, pending_collation) = collation_event; let PendingCollation { relay_parent, peer_id, .. } = pending_collation; - note_good_collation(ctx.sender(), &state.peer_data, collator_id).await; + note_good_collation(ctx.sender(), &state.peer_data, collator_id.clone()).await; notify_collation_seconded(ctx.sender(), peer_id, relay_parent, stmt).await; - if let Some(collations) = state.collations_per_relay_parent.get_mut(&parent) { - collations.status = CollationStatus::Seconded; + if let Some(state) = state.per_relay_parent.get_mut(&parent) { + state.collations.status = CollationStatus::Seconded; + *state + .collations + .seconded_count + .entry(pending_collation.para_id) + .or_insert(0) += 1; } + // If async backing is enabled, make an attempt to fetch next collation. + dequeue_next_collation_and_fetch(ctx, state, parent, collator_id).await; } else { gum::debug!( target: LOG_TARGET, @@ -1147,7 +1471,8 @@ async fn process_msg( } }, Invalid(parent, candidate_receipt) => { - let id = match state.pending_candidates.entry(parent) { + let fetched_collation = FetchedCollation::from(&candidate_receipt); + let id = match state.fetched_candidates.entry(fetched_collation) { Entry::Occupied(entry) if entry.get().1.commitments_hash == Some(candidate_receipt.commitments_hash) => @@ -1289,11 +1614,11 @@ async fn dequeue_next_collation_and_fetch( // The collator we tried to fetch from last. previous_fetch: CollatorId, ) { - if let Some((next, id)) = state - .collations_per_relay_parent - .get_mut(&relay_parent) - .and_then(|c| c.get_next_collation_to_fetch(Some(&previous_fetch))) - { + if let Some((next, id)) = state.per_relay_parent.get_mut(&relay_parent).and_then(|state| { + state + .collations + .get_next_collation_to_fetch(Some(&previous_fetch), state.prospective_parachains_mode) + }) { gum::debug!( target: LOG_TARGET, ?relay_parent, @@ -1356,6 +1681,18 @@ async fn handle_collation_fetched_result( // notify the collator of their successful second backing let relay_parent = collation_event.1.relay_parent; + let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { + Some(state) => state, + None => { + gum::trace!( + target: LOG_TARGET, + relay_parent = ?relay_parent, + "Fetched collation for a parent out of view", + ); + return + }, + }; + let (candidate_receipt, pov) = match res { Ok(res) => res, Err(e) => { @@ -1374,19 +1711,13 @@ async fn handle_collation_fetched_result( }, }; - if let Some(collations) = state.collations_per_relay_parent.get_mut(&relay_parent) { - if let CollationStatus::Seconded = collations.status { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - "Already seconded - no longer interested in collation fetch result." - ); - return - } - collations.status = CollationStatus::WaitingOnValidation; - } + let collations = &mut per_relay_parent.collations; + // There's always a single collation being fetched at any moment of time. + // In case of a failure, we reset the status back to waiting. + collations.status = CollationStatus::WaitingOnValidation; - if let Entry::Vacant(entry) = state.pending_candidates.entry(relay_parent) { + let fetched_collation = FetchedCollation::from(&candidate_receipt); + if let Entry::Vacant(entry) = state.fetched_candidates.entry(fetched_collation) { collation_event.1.commitments_hash = Some(candidate_receipt.commitments_hash); if let Some(pvd) = request_persisted_validation_data( @@ -1534,7 +1865,7 @@ async fn poll_collation_response( ); CollationFetchResult::Error(None) }, - Ok(CollationFetchingResponse::Collation(receipt, _)) + Ok(request_v1::CollationFetchingResponse::Collation(receipt, _)) if receipt.descriptor().para_id != pending_collation.para_id => { gum::debug!( @@ -1547,7 +1878,7 @@ async fn poll_collation_response( CollationFetchResult::Error(Some(COST_WRONG_PARA)) }, - Ok(CollationFetchingResponse::Collation(receipt, pov)) => { + Ok(request_v1::CollationFetchingResponse::Collation(receipt, pov)) => { gum::debug!( target: LOG_TARGET, para_id = %pending_collation.para_id, From 9fee11ea8be39135c802e6cef9f225bdd1502331 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 5 Aug 2022 17:17:42 +0300 Subject: [PATCH 13/45] Add more checks for advertisement --- .../src/validator_side/mod.rs | 83 ++++++++++++++----- 1 file changed, 61 insertions(+), 22 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 2c15fb641108..2ee4106f814a 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -55,8 +55,8 @@ use polkadot_node_subsystem_util::{ backing_implicit_view::View as ImplicitView, metrics::prometheus::prometheus::HistogramTimer, }; use polkadot_primitives::v2::{ - CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId, OccupiedCoreAssumption, - PersistedValidationData, + CandidateHash, CandidateReceipt, CollatorId, CoreState, Hash, Id as ParaId, + OccupiedCoreAssumption, PersistedValidationData, }; use crate::error::{Error, Result}; @@ -275,7 +275,7 @@ impl PeerData { } state.last_active = Instant::now(); - Ok((state.collator_id, state.para_id)) + Ok((state.collator_id.clone(), state.para_id)) }, } } @@ -512,9 +512,21 @@ impl Collations { } } +#[derive(Debug, Copy, Clone)] +enum AssignedCoreState { + Scheduled, + Occupied, +} + +impl AssignedCoreState { + fn is_occupied(&self) -> bool { + matches!(self, AssignedCoreState::Occupied) + } +} + #[derive(Debug, Copy, Clone)] struct GroupAssignments { - current: Option, + current: Option<(ParaId, AssignedCoreState)>, } struct PerRelayParent { @@ -658,7 +670,11 @@ async fn assign_incoming( Some(group) => { let core_now = rotation_info.core_for_group(group, cores.len()); - cores.get(core_now.0 as usize).and_then(|c| c.para_id()) + cores.get(core_now.0 as usize).and_then(|c| match c { + CoreState::Occupied(core) => Some((core.para_id(), AssignedCoreState::Occupied)), + CoreState::Scheduled(core) => Some((core.para_id, AssignedCoreState::Scheduled)), + CoreState::Free => None, + }) }, None => { gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator"); @@ -675,14 +691,14 @@ async fn assign_incoming( // // However, this'll work fine for parachains, as each parachain gets a dedicated // core. - if let Some(para_now) = para_now { - let entry = current_assignments.entry(para_now).or_default(); + if let Some((para_id, _)) = para_now.as_ref() { + let entry = current_assignments.entry(*para_id).or_default(); *entry += 1; if *entry == 1 { gum::debug!( target: LOG_TARGET, ?relay_parent, - para_id = ?para_now, + para_id = ?para_id, "Assigned to a parachain", ); } @@ -697,7 +713,7 @@ fn remove_outgoing( ) { let GroupAssignments { current } = per_relay_parent.assignment; - if let Some(cur) = current { + if let Some((cur, _)) = current { if let Entry::Occupied(mut occupied) = current_assignments.entry(cur) { *occupied.get_mut() -= 1; if *occupied.get() == 0 { @@ -1096,6 +1112,14 @@ async fn handle_advertisement( .span_per_relay_parent .get(&relay_parent) .map(|s| s.child("advertise-collation")); + + // First, perform validity checks: + // - Relay parent is known + // - Peer is declared + // - Para id is indeed the one we're assigned to at the given relay parent + // - Collator is not trying to build on top of occupied core (unless async + // backing is enabled) + let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { Some(state) => state, None => { @@ -1111,6 +1135,7 @@ async fn handle_advertisement( }, }; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; + let assignment = per_relay_parent.assignment; let peer_data = match state.peer_data.get_mut(&peer_id) { None => { @@ -1125,6 +1150,7 @@ async fn handle_advertisement( }, Some(p) => p, }; + let para_id = if let Some(id) = peer_data.collating_para() { id } else { @@ -1138,6 +1164,31 @@ async fn handle_advertisement( return }; + let core_state = match assignment.current { + Some((id, core_state)) if id == para_id => core_state, + _ => { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Advertise collation message for relay parent we're not assigned to", + ); + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + return + }, + }; + + if !relay_parent_mode.is_enabled() && core_state.is_occupied() { + gum::debug!( + target: LOG_TARGET, + peer_id = ?peer_id, + ?relay_parent, + "Advertise collation message for an occupied core (async backing disabled)", + ); + modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + return + } + let insert_result = match (relay_parent_mode, vstaging_args) { (ProspectiveParachainsMode::Disabled, None) => peer_data.insert_advertisement( relay_parent, @@ -1393,7 +1444,6 @@ async fn handle_network_msg( PeerMessage(remote, msg) => { process_incoming_peer_message(ctx, state, remote, msg).await; }, - PeerMessage(_, Versioned::VStaging(_)) => todo!(), } Ok(()) @@ -1642,18 +1692,7 @@ async fn request_persisted_validation_data( relay_parent: Hash, para_id: ParaId, ) -> Option { - // TODO [https://github.com/paritytech/polkadot/issues/5054] - // - // As of https://github.com/paritytech/polkadot/pull/5557 the - // `Second` message requires the `PersistedValidationData` to be - // supplied. - // - // Without asynchronous backing, this can be easily fetched from the - // chain state. - // - // This assumes the core is _scheduled_, in keeping with the effective - // current behavior. If the core is occupied, we simply don't return - // anything. Likewise with runtime API errors, which are rare. + // The core is guaranteed to be scheduled since we accepted the advertisement. let res = polkadot_node_subsystem_util::request_persisted_validation_data( relay_parent, para_id, From a72212fe5424fbd38d748e61a43ff21630364d43 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 10 Aug 2022 16:05:33 +0300 Subject: [PATCH 14/45] Request pvd based on async backing mode --- node/network/collator-protocol/src/error.rs | 6 + .../src/validator_side/collation.rs | 199 ++++++++++ .../src/validator_side/mod.rs | 344 +++++++----------- 3 files changed, 330 insertions(+), 219 deletions(-) create mode 100644 node/network/collator-protocol/src/validator_side/collation.rs diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index 233f747bbcaf..d69f8838b9e6 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -58,6 +58,12 @@ pub enum Error { #[error("Response receiver for Runtime API version request cancelled")] CancelledRuntimeApiVersion(oneshot::Canceled), + #[error("Response receiver for persisted validation data request cancelled")] + CancelledRuntimePersistedValidationData(oneshot::Canceled), + + #[error("Response receiver for prospective validation data request cancelled")] + CancelledProspectiveValidationData(oneshot::Canceled), + #[error("CollationSeconded contained statement with invalid signature")] InvalidStatementSignature(UncheckedSignedFullStatement), } diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs new file mode 100644 index 000000000000..33c3b0785fd1 --- /dev/null +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -0,0 +1,199 @@ +// Copyright 2017-2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +use futures::channel::oneshot; +use std::collections::{HashMap, VecDeque}; + +use polkadot_node_network_protocol::PeerId; +use polkadot_node_primitives::PoV; +use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId}; + +use crate::{ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; + +/// Candidate hash paired with the parent head hash. +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +pub struct ProspectiveCandidate(pub Option<(CandidateHash, Hash)>); + +impl ProspectiveCandidate { + pub fn candidate_hash(&self) -> Option { + self.0.as_ref().map(|c| c.0) + } +} + +/// Identifier of a fetched collation. +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct FetchedCollation { + pub relay_parent: Hash, + pub para_id: ParaId, + pub candidate_hash: CandidateHash, + pub collator_id: CollatorId, +} + +impl From<&CandidateReceipt> for FetchedCollation { + fn from(receipt: &CandidateReceipt) -> Self { + let descriptor = receipt.descriptor(); + Self { + relay_parent: descriptor.relay_parent, + para_id: descriptor.para_id, + candidate_hash: receipt.hash(), + collator_id: descriptor.collator.clone(), + } + } +} + +#[derive(Debug, Clone, Hash, Eq, PartialEq)] +pub struct PendingCollation { + pub relay_parent: Hash, + pub para_id: ParaId, + pub peer_id: PeerId, + pub prospective_candidate: ProspectiveCandidate, + pub commitments_hash: Option, +} + +impl PendingCollation { + pub fn new( + relay_parent: Hash, + para_id: ParaId, + peer_id: &PeerId, + prospective_candidate: ProspectiveCandidate, + ) -> Self { + Self { + relay_parent, + para_id, + peer_id: peer_id.clone(), + prospective_candidate, + commitments_hash: None, + } + } +} + +pub type CollationEvent = (CollatorId, PendingCollation); + +pub type PendingCollationFetch = + (CollationEvent, std::result::Result<(CandidateReceipt, PoV), oneshot::Canceled>); + +/// The status of the collations in [`CollationsPerRelayParent`]. +#[derive(Debug, Clone, Copy)] +pub enum CollationStatus { + /// We are waiting for a collation to be advertised to us. + Waiting, + /// We are currently fetching a collation. + Fetching, + /// We are waiting that a collation is being validated. + WaitingOnValidation, + /// We have seconded a collation. + Seconded, +} + +impl Default for CollationStatus { + fn default() -> Self { + Self::Waiting + } +} + +impl CollationStatus { + /// Downgrades to `Waiting`, but only if `self != Seconded`. + fn back_to_waiting(&mut self, relay_parent_mode: ProspectiveParachainsMode) { + match self { + Self::Seconded => + if relay_parent_mode.is_enabled() { + // With async backing enabled it's allowed to + // second more candidates. + *self = Self::Waiting + }, + _ => *self = Self::Waiting, + } + } +} + +/// Information about collations per relay parent. +#[derive(Default)] +pub struct Collations { + /// What is the current status in regards to a collation for this relay parent? + pub status: CollationStatus, + /// Collator we're fetching from. + /// + /// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME` + /// yet. + pub fetching_from: Option, + /// Collation that were advertised to us, but we did not yet fetch. + pub waiting_queue: VecDeque<(PendingCollation, CollatorId)>, + /// How many collations have been seconded per parachain. + /// Only used when async backing is enabled. + pub seconded_count: HashMap, +} + +impl Collations { + /// Note a seconded collation for a given para. + pub(super) fn note_seconded(&mut self, para_id: ParaId) { + *self.seconded_count.entry(para_id).or_insert(0) += 1 + } + + /// Returns the next collation to fetch from the `unfetched_collations`. + /// + /// This will reset the status back to `Waiting` using [`CollationStatus::back_to_waiting`]. + /// + /// Returns `Some(_)` if there is any collation to fetch, the `status` is not `Seconded` and + /// the passed in `finished_one` is the currently `waiting_collation`. + pub(super) fn get_next_collation_to_fetch( + &mut self, + finished_one: Option<&CollatorId>, + relay_parent_mode: ProspectiveParachainsMode, + ) -> Option<(PendingCollation, CollatorId)> { + // If finished one does not match waiting_collation, then we already dequeued another fetch + // to replace it. + if self.fetching_from.as_ref() != finished_one { + gum::trace!( + target: LOG_TARGET, + waiting_collation = ?self.fetching_from, + ?finished_one, + "Not proceeding to the next collation - has already been done." + ); + return None + } + self.status.back_to_waiting(relay_parent_mode); + + match self.status { + // We don't need to fetch any other collation when we already have seconded one. + CollationStatus::Seconded => None, + CollationStatus::Waiting => { + while let Some(next) = self.waiting_queue.pop_front() { + let para_id = next.0.para_id; + if !self.is_fetch_allowed(relay_parent_mode, para_id) { + continue + } + + return Some(next) + } + + None + }, + CollationStatus::WaitingOnValidation | CollationStatus::Fetching => + unreachable!("We have reset the status above!"), + } + } + + /// Checks the limit of seconded candidates for a given para. + pub(super) fn is_fetch_allowed( + &self, + relay_parent_mode: ProspectiveParachainsMode, + para_id: ParaId, + ) -> bool { + let seconded_limit = + if relay_parent_mode.is_enabled() { MAX_CANDIDATE_DEPTH + 1 } else { 1 }; + self.seconded_count.get(¶_id).map_or(true, |&num| num < seconded_limit) + } +} diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 2ee4106f814a..e150a988af16 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -24,7 +24,7 @@ use futures::{ }; use futures_timer::Delay; use std::{ - collections::{hash_map::Entry, HashMap, HashSet, VecDeque}, + collections::{hash_map::Entry, HashMap, HashSet}, task::Poll, time::{Duration, Instant}, }; @@ -47,7 +47,7 @@ use polkadot_node_subsystem::{ jaeger, messages::{ CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, NetworkBridgeEvent, - NetworkBridgeMessage, + NetworkBridgeMessage, ProspectiveParachainsMessage, ProspectiveValidationDataRequest, }, overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; @@ -66,8 +66,14 @@ use super::{ MAX_CANDIDATE_DEPTH, }; +mod collation; mod metrics; +use collation::{ + CollationEvent, CollationStatus, Collations, FetchedCollation, PendingCollation, + PendingCollationFetch, ProspectiveCandidate, +}; + #[cfg(test)] mod tests; @@ -352,166 +358,6 @@ impl Default for PeerData { } } -/// Identifier of a fetched collation. -#[derive(Debug, Clone, Hash, Eq, PartialEq)] -struct FetchedCollation { - relay_parent: Hash, - para_id: ParaId, - candidate_hash: CandidateHash, - collator_id: CollatorId, -} - -impl From<&CandidateReceipt> for FetchedCollation { - fn from(receipt: &CandidateReceipt) -> Self { - let descriptor = receipt.descriptor(); - Self { - relay_parent: descriptor.relay_parent, - para_id: descriptor.para_id, - candidate_hash: receipt.hash(), - collator_id: descriptor.collator.clone(), - } - } -} - -#[derive(Debug, Clone, Hash, Eq, PartialEq)] -struct PendingCollation { - relay_parent: Hash, - para_id: ParaId, - peer_id: PeerId, - candidate_hash: Option, - commitments_hash: Option, -} - -impl PendingCollation { - fn new( - relay_parent: Hash, - para_id: &ParaId, - peer_id: &PeerId, - candidate_hash: Option, - ) -> Self { - Self { - relay_parent, - para_id: para_id.clone(), - peer_id: peer_id.clone(), - candidate_hash, - commitments_hash: None, - } - } -} - -type CollationEvent = (CollatorId, PendingCollation); - -type PendingCollationFetch = - (CollationEvent, std::result::Result<(CandidateReceipt, PoV), oneshot::Canceled>); - -/// The status of the collations in [`CollationsPerRelayParent`]. -#[derive(Debug, Clone, Copy)] -enum CollationStatus { - /// We are waiting for a collation to be advertised to us. - Waiting, - /// We are currently fetching a collation. - Fetching, - /// We are waiting that a collation is being validated. - WaitingOnValidation, - /// We have seconded a collation. - Seconded, -} - -impl Default for CollationStatus { - fn default() -> Self { - Self::Waiting - } -} - -impl CollationStatus { - /// Downgrades to `Waiting`, but only if `self != Seconded`. - fn back_to_waiting(&mut self, relay_parent_mode: ProspectiveParachainsMode) { - match self { - Self::Seconded => - if relay_parent_mode.is_enabled() { - // With async backing enabled it's allowed to - // second more candidates. - *self = Self::Waiting - }, - _ => *self = Self::Waiting, - } - } -} - -/// Information about collations per relay parent. -#[derive(Default)] -struct Collations { - /// What is the current status in regards to a collation for this relay parent? - status: CollationStatus, - /// Collator we're fetching from. - /// - /// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME` - /// yet. - fetching_from: Option, - /// Collation that were advertised to us, but we did not yet fetch. - waiting_queue: VecDeque<(PendingCollation, CollatorId)>, - /// How many collations have been seconded per parachain. - /// Only used when async backing is enabled. - seconded_count: HashMap, -} - -impl Collations { - /// Returns the next collation to fetch from the `unfetched_collations`. - /// - /// This will reset the status back to `Waiting` using [`CollationStatus::back_to_waiting`]. - /// - /// Returns `Some(_)` if there is any collation to fetch, the `status` is not `Seconded` and - /// the passed in `finished_one` is the currently `waiting_collation`. - fn get_next_collation_to_fetch( - &mut self, - finished_one: Option<&CollatorId>, - relay_parent_mode: ProspectiveParachainsMode, - ) -> Option<(PendingCollation, CollatorId)> { - // If finished one does not match waiting_collation, then we already dequeued another fetch - // to replace it. - if self.fetching_from.as_ref() != finished_one { - gum::trace!( - target: LOG_TARGET, - waiting_collation = ?self.fetching_from, - ?finished_one, - "Not proceeding to the next collation - has already been done." - ); - return None - } - self.status.back_to_waiting(relay_parent_mode); - - match self.status { - // We don't need to fetch any other collation when we already have seconded one. - CollationStatus::Seconded => None, - CollationStatus::Waiting => { - while let Some(next) = self.waiting_queue.pop_front() { - let para_id = next.0.para_id; - if !self.is_fetch_allowed(relay_parent_mode, para_id) { - continue - } - - return Some(next) - } - - None - }, - CollationStatus::WaitingOnValidation | CollationStatus::Fetching => - unreachable!("We have reset the status above!"), - } - } - - /// Checks the limit of seconded candidates for a given para. - fn is_fetch_allowed( - &self, - relay_parent_mode: ProspectiveParachainsMode, - para_id: ParaId, - ) -> bool { - let seconded_limit = - if relay_parent_mode.is_enabled() { MAX_CANDIDATE_DEPTH + 1 } else { 1 }; - self.seconded_count.get(¶_id).map_or(true, |&num| num < seconded_limit) - } -} - #[derive(Debug, Copy, Clone)] enum AssignedCoreState { Scheduled, @@ -564,7 +410,7 @@ struct State { /// to asynchronous backing is done. active_leaves: HashMap, - /// State tracked + /// State tracked per relay parent. per_relay_parent: HashMap, /// Track all active collators and their data. @@ -754,7 +600,8 @@ async fn fetch_collation( ) { let (tx, rx) = oneshot::channel(); - let PendingCollation { relay_parent, para_id, peer_id, candidate_hash, .. } = pc; + let PendingCollation { relay_parent, para_id, peer_id, prospective_candidate, .. } = pc; + let candidate_hash = prospective_candidate.candidate_hash(); if let Some(peer_data) = state.peer_data.get(&peer_id) { // If candidate hash is `Some` then relay parent supports prospective @@ -772,7 +619,7 @@ async fn fetch_collation( state, relay_parent, para_id, - candidate_hash, + prospective_candidate, peer_id, id.clone(), tx, @@ -871,7 +718,7 @@ async fn request_collation( state: &mut State, relay_parent: Hash, para_id: ParaId, - candidate_hash: Option, + prospective_candidate: ProspectiveCandidate, peer_id: PeerId, collator_id: CollatorId, result: oneshot::Sender<(CandidateReceipt, PoV)>, @@ -890,7 +737,8 @@ async fn request_collation( }, }; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; - let pending_collation = PendingCollation::new(relay_parent, ¶_id, &peer_id, candidate_hash); + let pending_collation = + PendingCollation::new(relay_parent, para_id, &peer_id, prospective_candidate); if state.requested_collations.contains_key(&pending_collation) { gum::warn!( target: LOG_TARGET, @@ -902,7 +750,7 @@ async fn request_collation( return } - let (requests, response_recv) = match (relay_parent_mode, candidate_hash) { + let (requests, response_recv) = match (relay_parent_mode, prospective_candidate.0) { (ProspectiveParachainsMode::Disabled, None) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), @@ -911,7 +759,7 @@ async fn request_collation( let requests = Requests::CollationFetchingV1(req); (requests, response_recv.boxed()) }, - (ProspectiveParachainsMode::Enabled, Some(candidate_hash)) => { + (ProspectiveParachainsMode::Enabled, Some((candidate_hash, _))) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), request_vstaging::CollationFetchingRequest { @@ -946,7 +794,7 @@ async fn request_collation( }; state.requested_collations.insert( - PendingCollation::new(relay_parent, ¶_id, &peer_id, candidate_hash), + PendingCollation::new(relay_parent, para_id, &peer_id, prospective_candidate), per_request, ); @@ -1104,7 +952,7 @@ async fn handle_advertisement( state: &mut State, relay_parent: Hash, peer_id: &PeerId, - vstaging_args: Option<(CandidateHash, Hash)>, + prospective_candidate: Option<(CandidateHash, Hash)>, ) where Sender: CollatorProtocolSenderTrait, { @@ -1170,8 +1018,9 @@ async fn handle_advertisement( gum::debug!( target: LOG_TARGET, peer_id = ?peer_id, + para_id = ?para_id, ?relay_parent, - "Advertise collation message for relay parent we're not assigned to", + "Advertise collation message for para we're no assigned to", ); modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; return @@ -1189,7 +1038,7 @@ async fn handle_advertisement( return } - let insert_result = match (relay_parent_mode, vstaging_args) { + let insert_result = match (relay_parent_mode, prospective_candidate) { (ProspectiveParachainsMode::Disabled, None) => peer_data.insert_advertisement( relay_parent, relay_parent_mode, @@ -1209,10 +1058,11 @@ async fn handle_advertisement( ) }, _ => { - gum::warn!( + gum::error!( target: LOG_TARGET, peer_id = ?peer_id, ?relay_parent, + relay_parent_mode = ?relay_parent_mode, "Invalid arguments for advertisement", ); return @@ -1229,9 +1079,12 @@ async fn handle_advertisement( "Received advertise collation", ); - let maybe_candidate_hash = vstaging_args.map(|(candidate_hash, _)| candidate_hash); - let pending_collation = - PendingCollation::new(relay_parent, ¶_id, peer_id, maybe_candidate_hash); + let pending_collation = PendingCollation::new( + relay_parent, + para_id, + peer_id, + ProspectiveCandidate(prospective_candidate), + ); let collations = &mut per_relay_parent.collations; if !collations.is_fetch_allowed(relay_parent_mode, para_id) { @@ -1270,7 +1123,7 @@ async fn handle_advertisement( peer_id = ?peer_id, %para_id, ?relay_parent, - "A collation has been already seconded", + "A collation has already been seconded", ); }, } @@ -1504,11 +1357,7 @@ async fn process_msg( if let Some(state) = state.per_relay_parent.get_mut(&parent) { state.collations.status = CollationStatus::Seconded; - *state - .collations - .seconded_count - .entry(pending_collation.para_id) - .or_insert(0) += 1; + state.collations.note_seconded(pending_collation.para_id); } // If async backing is enabled, make an attempt to fetch next collation. dequeue_next_collation_and_fetch(ctx, state, parent, collator_id).await; @@ -1686,26 +1535,53 @@ async fn dequeue_next_collation_and_fetch( } } -#[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] -async fn request_persisted_validation_data( - ctx: &mut Context, +async fn request_persisted_validation_data( + sender: &mut Sender, relay_parent: Hash, para_id: ParaId, -) -> Option { +) -> Result> +where + Sender: CollatorProtocolSenderTrait, +{ // The core is guaranteed to be scheduled since we accepted the advertisement. - let res = polkadot_node_subsystem_util::request_persisted_validation_data( + polkadot_node_subsystem_util::request_persisted_validation_data( relay_parent, para_id, OccupiedCoreAssumption::Free, - ctx.sender(), + sender, ) .await - .await; + .await + .map_err(Error::CancelledRuntimePersistedValidationData)? + .map_err(Error::RuntimeApi) +} - match res { - Ok(Ok(Some(pvd))) => Some(pvd), - _ => None, - } +async fn request_prospective_validation_data( + sender: &mut Sender, + candidate_relay_parent: Hash, + parent_head_data_hash: Hash, + para_id: ParaId, +) -> Result> +where + Sender: CollatorProtocolSenderTrait, +{ + let (tx, rx) = oneshot::channel(); + + let request = ProspectiveValidationDataRequest { + para_id, + candidate_relay_parent, + parent_head_data_hash, + // TODO [now]: max pov size should be from runtime + // configuration at candidate relay parent. + // Where do we fetch it from? + max_pov_size: todo!(), + }; + + sender + .send_message(ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx)) + .await; + + rx.await.map_err(Error::CancelledProspectiveValidationData) } /// Handle a fetched collation result. @@ -1719,6 +1595,7 @@ async fn handle_collation_fetched_result( // memorize the `collation_event` for that `relay_parent`, such that we may // notify the collator of their successful second backing let relay_parent = collation_event.1.relay_parent; + let para_id = collation_event.1.para_id; let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { Some(state) => state, @@ -1731,6 +1608,7 @@ async fn handle_collation_fetched_result( return }, }; + let relay_parent_mode = per_relay_parent.prospective_parachains_mode; let (candidate_receipt, pov) = match res { Ok(res) => res, @@ -1738,7 +1616,7 @@ async fn handle_collation_fetched_result( gum::debug!( target: LOG_TARGET, relay_parent = ?collation_event.1.relay_parent, - para_id = ?collation_event.1.para_id, + para_id = ?para_id, peer_id = ?collation_event.1.peer_id, collator_id = ?collation_event.0, error = ?e, @@ -1759,30 +1637,58 @@ async fn handle_collation_fetched_result( if let Entry::Vacant(entry) = state.fetched_candidates.entry(fetched_collation) { collation_event.1.commitments_hash = Some(candidate_receipt.commitments_hash); - if let Some(pvd) = request_persisted_validation_data( - ctx, - candidate_receipt.descriptor().relay_parent, - candidate_receipt.descriptor().para_id, - ) - .await - { - // TODO [https://github.com/paritytech/polkadot/issues/5054] - // - // If PVD isn't available (core occupied) then we'll silently - // just not second this. But prior to asynchronous backing - // we wouldn't second anyway because the core is occupied. - // - // The proper refactoring would be to accept declares from collators - // but not even fetch from them if the core is occupied. Given 5054, - // there's no reason to do this right now. - ctx.send_message(CandidateBackingMessage::Second( - relay_parent.clone(), - candidate_receipt, - pvd, - pov, - )) - .await; - } + let result = match collation_event.1.prospective_candidate.0 { + Some((_, parent_head_data_hash)) => + request_prospective_validation_data( + ctx.sender(), + relay_parent, + parent_head_data_hash, + para_id, + ) + .await, + None => + request_persisted_validation_data( + ctx.sender(), + candidate_receipt.descriptor().relay_parent, + candidate_receipt.descriptor().para_id, + ) + .await, + }; + + let pvd = match result { + Ok(Some(pvd)) => pvd, + Ok(None) => { + gum::warn!( + target: LOG_TARGET, + ?relay_parent, + ?para_id, + ?relay_parent_mode, + candidate = ?candidate_receipt.hash(), + "Persisted validation data isn't available", + ); + return + }, + Err(err) => { + gum::warn!( + target: LOG_TARGET, + ?relay_parent, + ?para_id, + ?relay_parent_mode, + candidate = ?candidate_receipt.hash(), + "Failed to fetch persisted validation data due to an error: {}", + err + ); + return + }, + }; + + ctx.send_message(CandidateBackingMessage::Second( + relay_parent, + candidate_receipt, + pvd, + pov, + )) + .await; entry.insert(collation_event); } else { From 0d9bc24bb8f42c6a52a79a9190cb45c9d2bb148f Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 12 Aug 2022 16:54:20 +0300 Subject: [PATCH 15/45] review --- .../src/collator_side/mod.rs | 7 +- node/network/collator-protocol/src/error.rs | 9 +++ .../src/validator_side/collation.rs | 15 ++-- .../src/validator_side/mod.rs | 79 ++++++++----------- 4 files changed, 56 insertions(+), 54 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 0eb225a493c6..282e2dc380ce 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -936,7 +936,12 @@ async fn handle_peer_view_change( .known_allowed_relay_parents_under(&added, state.collating_on) .unwrap_or_default(), None => { - // Added leaf is unknown. + gum::trace!( + target: LOG_TARGET, + ?peer_id, + new_leaf = ?added, + "New leaf in peer's view is unknown", + ); continue }, }; diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index d69f8838b9e6..3beb5e057d91 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -64,6 +64,15 @@ pub enum Error { #[error("Response receiver for prospective validation data request cancelled")] CancelledProspectiveValidationData(oneshot::Canceled), + #[error("Response receiver for active validators request cancelled")] + CancelledActiveValidators(oneshot::Canceled), + + #[error("Response receiver for validator groups request cancelled")] + CancelledValidatorGroups(oneshot::Canceled), + + #[error("Response receiver for availability cores request cancelled")] + CancelledAvailabilityCores(oneshot::Canceled), + #[error("CollationSeconded contained statement with invalid signature")] InvalidStatementSignature(UncheckedSignedFullStatement), } diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 33c3b0785fd1..7ae15b26b646 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -23,13 +23,16 @@ use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, CollatorId, Hash, use crate::{ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; -/// Candidate hash paired with the parent head hash. +/// Candidate supplied with a para head it's built on top of. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] -pub struct ProspectiveCandidate(pub Option<(CandidateHash, Hash)>); +pub struct ProspectiveCandidate { + pub candidate_hash: CandidateHash, + pub parent_head_data_hash: Hash, +} impl ProspectiveCandidate { - pub fn candidate_hash(&self) -> Option { - self.0.as_ref().map(|c| c.0) + pub fn candidate_hash(&self) -> CandidateHash { + self.candidate_hash } } @@ -59,7 +62,7 @@ pub struct PendingCollation { pub relay_parent: Hash, pub para_id: ParaId, pub peer_id: PeerId, - pub prospective_candidate: ProspectiveCandidate, + pub prospective_candidate: Option, pub commitments_hash: Option, } @@ -68,7 +71,7 @@ impl PendingCollation { relay_parent: Hash, para_id: ParaId, peer_id: &PeerId, - prospective_candidate: ProspectiveCandidate, + prospective_candidate: Option, ) -> Self { Self { relay_parent, diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index e150a988af16..63c78ac9a87e 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -190,7 +190,7 @@ impl PeerData { .get(removed) .map_or(false, |s| s.prospective_parachains_mode.is_enabled()); let keep = relay_parent_mode_enabled && - is_relay_parent_in_view( + is_relay_parent_in_implicit_view( removed, ProspectiveParachainsMode::Enabled, implicit_view, @@ -219,7 +219,7 @@ impl PeerData { // - It belongs to allowed ancestry under some leaf // Discard otherwise. per_relay_parent.get(hash).map_or(false, |s| { - is_relay_parent_in_view( + is_relay_parent_in_implicit_view( hash, s.prospective_parachains_mode, implicit_view, @@ -245,7 +245,7 @@ impl PeerData { match self.state { PeerState::Connected(_) => Err(AdvertisementError::UndeclaredCollator), PeerState::Collating(ref mut state) => { - if !is_relay_parent_in_view( + if !is_relay_parent_in_implicit_view( &on_relay_parent, relay_parent_mode, implicit_view, @@ -447,7 +447,7 @@ struct State { fetched_candidates: HashMap, } -fn is_relay_parent_in_view( +fn is_relay_parent_in_implicit_view( relay_parent: &Hash, relay_parent_mode: ProspectiveParachainsMode, implicit_view: &ImplicitView, @@ -472,42 +472,25 @@ async fn assign_incoming( current_assignments: &mut HashMap, keystore: &SyncCryptoStorePtr, relay_parent: Hash, -) where +) -> Result<()> +where Sender: CollatorProtocolSenderTrait, { - let mv = polkadot_node_subsystem_util::request_validators(relay_parent, sender) + let validators = polkadot_node_subsystem_util::request_validators(relay_parent, sender) .await .await - .ok() - .map(|x| x.ok()) - .flatten(); + .map_err(Error::CancelledActiveValidators)??; - let mg = polkadot_node_subsystem_util::request_validator_groups(relay_parent, sender) - .await - .await - .ok() - .map(|x| x.ok()) - .flatten(); + let (groups, rotation_info) = + polkadot_node_subsystem_util::request_validator_groups(relay_parent, sender) + .await + .await + .map_err(Error::CancelledValidatorGroups)??; - let mc = polkadot_node_subsystem_util::request_availability_cores(relay_parent, sender) + let cores = polkadot_node_subsystem_util::request_availability_cores(relay_parent, sender) .await .await - .ok() - .map(|x| x.ok()) - .flatten(); - - let (validators, groups, rotation_info, cores) = match (mv, mg, mc) { - (Some(v), Some((g, r)), Some(c)) => (v, g, r, c), - _ => { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - "Failed to query runtime API for relay-parent", - ); - - return - }, - }; + .map_err(Error::CancelledAvailabilityCores)??; let para_now = match polkadot_node_subsystem_util::signing_key_and_index(&validators, keystore) .await @@ -525,7 +508,7 @@ async fn assign_incoming( None => { gum::trace!(target: LOG_TARGET, ?relay_parent, "Not a validator"); - return + return Ok(()) }, }; @@ -551,6 +534,8 @@ async fn assign_incoming( } *group_assignment = GroupAssignments { current: para_now }; + + Ok(()) } fn remove_outgoing( @@ -601,7 +586,7 @@ async fn fetch_collation( let (tx, rx) = oneshot::channel(); let PendingCollation { relay_parent, para_id, peer_id, prospective_candidate, .. } = pc; - let candidate_hash = prospective_candidate.candidate_hash(); + let candidate_hash = prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); if let Some(peer_data) = state.peer_data.get(&peer_id) { // If candidate hash is `Some` then relay parent supports prospective @@ -718,7 +703,7 @@ async fn request_collation( state: &mut State, relay_parent: Hash, para_id: ParaId, - prospective_candidate: ProspectiveCandidate, + prospective_candidate: Option, peer_id: PeerId, collator_id: CollatorId, result: oneshot::Sender<(CandidateReceipt, PoV)>, @@ -750,7 +735,7 @@ async fn request_collation( return } - let (requests, response_recv) = match (relay_parent_mode, prospective_candidate.0) { + let (requests, response_recv) = match (relay_parent_mode, prospective_candidate) { (ProspectiveParachainsMode::Disabled, None) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), @@ -759,7 +744,7 @@ async fn request_collation( let requests = Requests::CollationFetchingV1(req); (requests, response_recv.boxed()) }, - (ProspectiveParachainsMode::Enabled, Some((candidate_hash, _))) => { + (ProspectiveParachainsMode::Enabled, Some(ProspectiveCandidate { candidate_hash, .. })) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), request_vstaging::CollationFetchingRequest { @@ -1078,13 +1063,13 @@ async fn handle_advertisement( ?relay_parent, "Received advertise collation", ); + let prospective_candidate = + prospective_candidate.map(|(candidate_hash, parent_head_data_hash)| { + ProspectiveCandidate { candidate_hash, parent_head_data_hash } + }); - let pending_collation = PendingCollation::new( - relay_parent, - para_id, - peer_id, - ProspectiveCandidate(prospective_candidate), - ); + let pending_collation = + PendingCollation::new(relay_parent, para_id, peer_id, prospective_candidate); let collations = &mut per_relay_parent.collations; if !collations.is_fetch_allowed(relay_parent_mode, para_id) { @@ -1182,7 +1167,7 @@ where keystore, *leaf, ) - .await; + .await?; state.active_leaves.insert(*leaf, mode); state.per_relay_parent.insert(*leaf, per_relay_parent); @@ -1209,7 +1194,7 @@ where keystore, *block_hash, ) - .await; + .await?; entry.insert(per_relay_parent); } @@ -1637,8 +1622,8 @@ async fn handle_collation_fetched_result( if let Entry::Vacant(entry) = state.fetched_candidates.entry(fetched_collation) { collation_event.1.commitments_hash = Some(candidate_receipt.commitments_hash); - let result = match collation_event.1.prospective_candidate.0 { - Some((_, parent_head_data_hash)) => + let result = match collation_event.1.prospective_candidate { + Some(ProspectiveCandidate { parent_head_data_hash, .. }) => request_prospective_validation_data( ctx.sender(), relay_parent, From 6d410254a17cd4a426a0045aa1d35e8b07e96c77 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 12 Aug 2022 19:06:54 +0300 Subject: [PATCH 16/45] Validator side improvements --- .../src/collator_side/mod.rs | 5 ++- .../src/validator_side/collation.rs | 34 +++++++------------ .../src/validator_side/mod.rs | 4 +-- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 282e2dc380ce..dc35d123b8c4 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -1076,8 +1076,8 @@ where let collations = state .per_relay_parent - .get_mut(removed) - .map(|per_relay_parent| std::mem::take(&mut per_relay_parent.collations)) + .remove(removed) + .map(|per_relay_parent| per_relay_parent.collations) .unwrap_or_default(); for collation in collations.into_values() { state.collation_result_senders.remove(&collation.receipt.hash()); @@ -1103,7 +1103,6 @@ where ), } } - state.per_relay_parent.remove(removed); state.span_per_relay_parent.remove(removed); state.waiting_collation_fetches.remove(removed); } diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 7ae15b26b646..5c9f74abd86c 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -15,7 +15,7 @@ // along with Polkadot. If not, see . use futures::channel::oneshot; -use std::collections::{HashMap, VecDeque}; +use std::collections::VecDeque; use polkadot_node_network_protocol::PeerId; use polkadot_node_primitives::PoV; @@ -134,15 +134,14 @@ pub struct Collations { pub fetching_from: Option, /// Collation that were advertised to us, but we did not yet fetch. pub waiting_queue: VecDeque<(PendingCollation, CollatorId)>, - /// How many collations have been seconded per parachain. - /// Only used when async backing is enabled. - pub seconded_count: HashMap, + /// How many collations have been seconded. + pub seconded_count: usize, } impl Collations { /// Note a seconded collation for a given para. - pub(super) fn note_seconded(&mut self, para_id: ParaId) { - *self.seconded_count.entry(para_id).or_insert(0) += 1 + pub(super) fn note_seconded(&mut self) { + self.seconded_count += 1 } /// Returns the next collation to fetch from the `unfetched_collations`. @@ -172,31 +171,24 @@ impl Collations { match self.status { // We don't need to fetch any other collation when we already have seconded one. CollationStatus::Seconded => None, - CollationStatus::Waiting => { - while let Some(next) = self.waiting_queue.pop_front() { - let para_id = next.0.para_id; - if !self.is_fetch_allowed(relay_parent_mode, para_id) { - continue - } - - return Some(next) - } - - None - }, + CollationStatus::Waiting => + if !self.is_seconded_limit_reached(relay_parent_mode) { + None + } else { + self.waiting_queue.pop_front() + }, CollationStatus::WaitingOnValidation | CollationStatus::Fetching => unreachable!("We have reset the status above!"), } } /// Checks the limit of seconded candidates for a given para. - pub(super) fn is_fetch_allowed( + pub(super) fn is_seconded_limit_reached( &self, relay_parent_mode: ProspectiveParachainsMode, - para_id: ParaId, ) -> bool { let seconded_limit = if relay_parent_mode.is_enabled() { MAX_CANDIDATE_DEPTH + 1 } else { 1 }; - self.seconded_count.get(¶_id).map_or(true, |&num| num < seconded_limit) + self.seconded_count < seconded_limit } } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 63c78ac9a87e..5c54140be732 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -1072,7 +1072,7 @@ async fn handle_advertisement( PendingCollation::new(relay_parent, para_id, peer_id, prospective_candidate); let collations = &mut per_relay_parent.collations; - if !collations.is_fetch_allowed(relay_parent_mode, para_id) { + if !collations.is_seconded_limit_reached(relay_parent_mode) { gum::debug!( target: LOG_TARGET, peer_id = ?peer_id, @@ -1342,7 +1342,7 @@ async fn process_msg( if let Some(state) = state.per_relay_parent.get_mut(&parent) { state.collations.status = CollationStatus::Seconded; - state.collations.note_seconded(pending_collation.para_id); + state.collations.note_seconded(); } // If async backing is enabled, make an attempt to fetch next collation. dequeue_next_collation_and_fetch(ctx, state, parent, collator_id).await; From eeaa1ea2682e6b5e1a6be3416f1f5797e0d233fa Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 12 Aug 2022 19:58:57 +0300 Subject: [PATCH 17/45] Make old tests green --- .../src/validator_side/tests.rs | 89 ++++++++++++++----- 1 file changed, 66 insertions(+), 23 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/tests.rs b/node/network/collator-protocol/src/validator_side/tests.rs index 3c45a675aa48..3f5fc9952c72 100644 --- a/node/network/collator-protocol/src/validator_side/tests.rs +++ b/node/network/collator-protocol/src/validator_side/tests.rs @@ -42,6 +42,8 @@ use polkadot_primitives_test_helpers::{ const ACTIVITY_TIMEOUT: Duration = Duration::from_millis(500); const DECLARE_TIMEOUT: Duration = Duration::from_millis(25); +const API_VERSION_PROSPECTIVE_DISABLED: u32 = 2; + #[derive(Clone)] struct TestState { chain_ids: Vec, @@ -372,6 +374,19 @@ async fn advertise_collation( .await; } +async fn assert_runtime_version_request(virtual_overseer: &mut VirtualOverseer, hash: Hash) { + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + relay_parent, + RuntimeApiRequest::Version(tx) + )) => { + assert_eq!(relay_parent, hash); + tx.send(Ok(API_VERSION_PROSPECTIVE_DISABLED)).unwrap(); + } + ); +} + // As we receive a relevant advertisement act on it and issue a collation request. #[test] fn act_on_advertisement() { @@ -391,6 +406,7 @@ fn act_on_advertisement() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -432,6 +448,8 @@ fn collator_reporting_works() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -540,16 +558,21 @@ fn fetch_collations_works() { let second = Hash::random(); + let our_view = our_view![test_state.relay_parent, second]; + overseer_send( &mut virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange( - our_view![test_state.relay_parent, second], + our_view.clone(), )), ) .await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + // Iter over view since the order may change due to sorted invariant. + for hash in our_view.iter() { + assert_runtime_version_request(&mut virtual_overseer, *hash).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + } let peer_b = PeerId::random(); let peer_c = PeerId::random(); @@ -591,9 +614,11 @@ fn fetch_collations_works() { candidate_a.descriptor.para_id = test_state.chain_ids[0]; candidate_a.descriptor.relay_parent = test_state.relay_parent; response_channel - .send(Ok( - CollationFetchingResponse::Collation(candidate_a.clone(), pov.clone()).encode() - )) + .send(Ok(request_v1::CollationFetchingResponse::Collation( + candidate_a.clone(), + pov.clone(), + ) + .encode())) .expect("Sending response should succeed"); assert_candidate_backing_second( @@ -675,15 +700,19 @@ fn fetch_collations_works() { // First request finishes now: response_channel_non_exclusive - .send(Ok( - CollationFetchingResponse::Collation(candidate_a.clone(), pov.clone()).encode() - )) + .send(Ok(request_v1::CollationFetchingResponse::Collation( + candidate_a.clone(), + pov.clone(), + ) + .encode())) .expect("Sending response should succeed"); response_channel - .send(Ok( - CollationFetchingResponse::Collation(candidate_a.clone(), pov.clone()).encode() - )) + .send(Ok(request_v1::CollationFetchingResponse::Collation( + candidate_a.clone(), + pov.clone(), + ) + .encode())) .expect("Sending response should succeed"); assert_candidate_backing_second( @@ -713,6 +742,7 @@ fn reject_connection_to_next_group() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -752,16 +782,20 @@ fn fetch_next_collation_on_invalid_collation() { let second = Hash::random(); + let our_view = our_view![test_state.relay_parent, second]; + overseer_send( &mut virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange( - our_view![test_state.relay_parent, second], + our_view.clone(), )), ) .await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + for hash in our_view.iter() { + assert_runtime_version_request(&mut virtual_overseer, *hash).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + } let peer_b = PeerId::random(); let peer_c = PeerId::random(); @@ -798,9 +832,11 @@ fn fetch_next_collation_on_invalid_collation() { candidate_a.descriptor.para_id = test_state.chain_ids[0]; candidate_a.descriptor.relay_parent = test_state.relay_parent; response_channel - .send(Ok( - CollationFetchingResponse::Collation(candidate_a.clone(), pov.clone()).encode() - )) + .send(Ok(request_v1::CollationFetchingResponse::Collation( + candidate_a.clone(), + pov.clone(), + ) + .encode())) .expect("Sending response should succeed"); let receipt = assert_candidate_backing_second( @@ -860,6 +896,7 @@ fn inactive_disconnected() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, hash_a).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -900,18 +937,20 @@ fn activity_extends_life() { let hash_b = Hash::repeat_byte(1); let hash_c = Hash::repeat_byte(2); + let our_view = our_view![hash_a, hash_b, hash_c]; + overseer_send( &mut virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange( - our_view![hash_a, hash_b, hash_c], + our_view.clone(), )), ) .await; - // 3 heads, 3 times. - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + for hash in our_view.iter() { + assert_runtime_version_request(&mut virtual_overseer, *hash).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + } let peer_b = PeerId::random(); @@ -967,6 +1006,7 @@ fn disconnect_if_no_declare() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -1005,6 +1045,7 @@ fn disconnect_if_wrong_declare() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -1067,6 +1108,7 @@ fn view_change_clears_old_collators() { ) .await; + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; let peer_b = PeerId::random(); @@ -1090,6 +1132,7 @@ fn view_change_clears_old_collators() { .await; test_state.group_rotation_info = test_state.group_rotation_info.bump_rotation(); + assert_runtime_version_request(&mut virtual_overseer, hash_b).await; respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; assert_collator_disconnect(&mut virtual_overseer, peer_b.clone()).await; From 2732e269f379aa741a9d6fa1330c6ab352677fa2 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 24 Aug 2022 20:17:57 +0300 Subject: [PATCH 18/45] More fixes --- .../src/collator_side/collation.rs | 10 +- .../src/collator_side/mod.rs | 32 ++-- .../src/validator_side/mod.rs | 137 ++++++++++++------ 3 files changed, 117 insertions(+), 62 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs index 2b59f2aed000..196f6f5102df 100644 --- a/node/network/collator-protocol/src/collator_side/collation.rs +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -135,5 +135,11 @@ impl VersionedCollationRequest { } } -pub type ActiveCollationFetches = - FuturesUnordered>; +pub struct CollationSendResult { + pub relay_parent: Hash, + pub candidate_hash: CandidateHash, + pub peer_id: PeerId, + pub timed_out: bool, +} + +pub type ActiveCollationFetches = FuturesUnordered>; diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index dc35d123b8c4..f1737f6a2142 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -62,8 +62,8 @@ mod metrics; mod tests; use collation::{ - ActiveCollationFetches, Collation, CollationStatus, VersionedCollationRequest, - WaitingCollationFetches, + ActiveCollationFetches, Collation, CollationSendResult, CollationStatus, + VersionedCollationRequest, WaitingCollationFetches, }; pub use metrics::Metrics; @@ -300,6 +300,7 @@ async fn distribute_collation( gum::debug!( target: LOG_TARGET, ?candidate_relay_parent, + ?relay_parent_mode, "The limit of {} collations per relay parent is already reached", collations_limit, ); @@ -702,15 +703,9 @@ async fn send_collation( state.active_collation_fetches.push( async move { let r = rx.timeout(MAX_UNSHARED_UPLOAD_TIME).await; - if r.is_none() { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - ?peer_id, - "Sending collation to validator timed out, carrying on with next validator." - ); - } - (relay_parent, candidate_hash, peer_id) + let timed_out = r.is_none(); + + CollationSendResult { relay_parent, candidate_hash, peer_id, timed_out } } .boxed(), ); @@ -1144,9 +1139,20 @@ pub(crate) async fn run( FromOrchestra::Signal(BlockFinalized(..)) => {} FromOrchestra::Signal(Conclude) => return Ok(()), }, - (relay_parent, candidate_hash, peer_id) = state.active_collation_fetches.select_next_some() => { + CollationSendResult { relay_parent, candidate_hash, peer_id, timed_out } = state.active_collation_fetches.select_next_some() => { let next = if let Some(waiting) = state.waiting_collation_fetches.get_mut(&relay_parent) { - waiting.waiting_peers.remove(&(peer_id, candidate_hash)); + if timed_out { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + ?peer_id, + ?candidate_hash, + "Sending collation to validator timed out, carrying on with next validator." + ); + waiting.waiting_peers.retain(|(waiting_peer_id, ..)| *waiting_peer_id != peer_id); + } else { + waiting.waiting_peers.remove(&(peer_id, candidate_hash)); + } if let Some(next) = waiting.waiting.pop_front() { next } else { diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 5c54140be732..6a3aef4f6a8e 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -46,8 +46,9 @@ use polkadot_node_primitives::{PoV, SignedFullStatement, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ - CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, NetworkBridgeEvent, - NetworkBridgeMessage, ProspectiveParachainsMessage, ProspectiveValidationDataRequest, + CandidateBackingMessage, CollatorProtocolMessage, HypotheticalDepthRequest, IfDisconnected, + NetworkBridgeEvent, NetworkBridgeMessage, ProspectiveParachainsMessage, + ProspectiveValidationDataRequest, }, overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; @@ -132,7 +133,11 @@ struct PerRequest { struct CollatingPeerState { collator_id: CollatorId, para_id: ParaId, - // Advertised relay parents. + // Collations advertised by peer per relay parent. + // + // V1 network protocol doesn't include candidate hash in + // advertisements, we store an empty set in this case to occupy + // a slot in map. advertisements: HashMap>, last_active: Instant, } @@ -294,7 +299,7 @@ impl PeerData { } } - /// Note that a peer is now collating with the given collator and para ids. + /// Note that a peer is now collating with the given collator and para id. /// /// This will overwrite any previous call to `set_collating` and should only be called /// if `is_collating` is false. @@ -443,7 +448,8 @@ struct State { /// another collator the chance to be faster (dequeue next fetch request as well). collation_fetch_timeouts: FuturesUnordered>, - /// Keep track of all pending candidate collations + /// Collations that we have successfully requested from peers and waiting + /// on validation. fetched_candidates: HashMap, } @@ -455,7 +461,10 @@ fn is_relay_parent_in_implicit_view( para_id: ParaId, ) -> bool { match relay_parent_mode { - ProspectiveParachainsMode::Disabled => true, + ProspectiveParachainsMode::Disabled => { + // The head is known and async backing is disabled => it is an active leaf. + true + }, ProspectiveParachainsMode::Enabled => active_leaves.iter().any(|(hash, mode)| { mode.is_enabled() && implicit_view @@ -596,9 +605,7 @@ async fn fetch_collation( Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; (collator_id, relay_parent) }; - state - .collation_fetch_timeouts - .push(timeout(id.clone(), relay_parent.clone()).boxed()); + state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); request_collation( sender, state, @@ -687,7 +694,7 @@ async fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) ); state .requested_collations - .retain(|pc, _| pc.peer_id != peer_id || !peer_data.has_advertised(&pc.relay_parent, None)); + .retain(|pc, _| pc.peer_id != peer_id || peer_data.has_advertised(&pc.relay_parent, None)); Ok(()) } @@ -844,7 +851,7 @@ async fn process_incoming_peer_message( target: LOG_TARGET, peer_id = ?origin, ?para_id, - "Peer is not in the collating state", + "Peer is already in the collating state", ); modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; return @@ -913,24 +920,41 @@ async fn process_incoming_peer_message( } } -// async fn request_hypothetical_depth( -// sender: &mut Sender, -// relay_parent: Hash, -// candidate_hash: CandidateHash, -// para_id: ParaId, -// ) -> Option> -// where -// Sender: CollatorProtocolSenderTrait, { -// let (tx, rx) = oneshot::channel(); - -// let request = HypotheticalDepthRequest { -// candidate_hash, -// candidate_para: todo!(), -// parent_head_data_hash: todo!(), -// candidate_relay_parent: todo!(), -// fragment_tree_relay_parent: todo!(), -// }; -// } +async fn is_seconding_allowed( + sender: &mut Sender, + relay_parent: Hash, + candidate_hash: CandidateHash, + parent_head_data_hash: Hash, + para_id: ParaId, + active_leaves: impl IntoIterator, +) -> Result +where + Sender: CollatorProtocolSenderTrait, +{ + for leaf in active_leaves { + let (tx, rx) = oneshot::channel(); + + let request = HypotheticalDepthRequest { + candidate_hash, + candidate_para: para_id, + parent_head_data_hash, + candidate_relay_parent: relay_parent, + fragment_tree_relay_parent: leaf, + }; + + sender + .send_message(ProspectiveParachainsMessage::GetHypotheticalDepth(request, tx)) + .await; + + let response = rx.await.map_err(Error::CancelledGetHypotheticalDepth)?; + + if !response.is_empty() { + return Ok(true) + } + } + + Ok(false) +} async fn handle_advertisement( sender: &mut Sender, @@ -1023,24 +1047,33 @@ async fn handle_advertisement( return } - let insert_result = match (relay_parent_mode, prospective_candidate) { - (ProspectiveParachainsMode::Disabled, None) => peer_data.insert_advertisement( - relay_parent, - relay_parent_mode, - None, - &state.implicit_view, - &state.active_leaves, - ), + // TODO: only fetch a collation if it's built on top of backed nodes in fragment tree. + // https://github.com/paritytech/polkadot/issues/5923 + let is_seconding_allowed = match (relay_parent_mode, prospective_candidate) { + (ProspectiveParachainsMode::Disabled, None) => true, (ProspectiveParachainsMode::Enabled, Some((candidate_hash, parent_head_data_hash))) => { - // TODO [now]: request hypothetical depth and check for backed parent nodes - // in a fragment tree. - peer_data.insert_advertisement( + let active_leaves = state.active_leaves.keys().copied(); + is_seconding_allowed( + sender, relay_parent, - relay_parent_mode, - Some(candidate_hash), - &state.implicit_view, - &state.active_leaves, + candidate_hash, + parent_head_data_hash, + para_id, + active_leaves, ) + .await + .unwrap_or_else(|err| { + gum::warn!( + target: LOG_TARGET, + ?relay_parent, + ?para_id, + ?candidate_hash, + ?relay_parent_mode, + error = %err, + "Failed to query prospective parachains subsystem", + ); + false + }) }, _ => { gum::error!( @@ -1054,6 +1087,19 @@ async fn handle_advertisement( }, }; + if !is_seconding_allowed { + return + } + + let candidate_hash = prospective_candidate.map(|(hash, ..)| hash); + let insert_result = peer_data.insert_advertisement( + relay_parent, + relay_parent_mode, + candidate_hash, + &state.implicit_view, + &state.active_leaves, + ); + match insert_result { Ok((id, para_id)) => { gum::debug!( @@ -1576,9 +1622,6 @@ async fn handle_collation_fetched_result( state: &mut State, (mut collation_event, res): PendingCollationFetch, ) { - // If no prior collation for this relay parent has been seconded, then - // memorize the `collation_event` for that `relay_parent`, such that we may - // notify the collator of their successful second backing let relay_parent = collation_event.1.relay_parent; let para_id = collation_event.1.para_id; From 1435696e46c41fd32eaecaab6bf484b3ef5c84c5 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 5 Sep 2022 22:26:08 +0300 Subject: [PATCH 19/45] Collator side tests draft --- .../src/collator_side/mod.rs | 9 +- .../collator_side/{tests.rs => tests/mod.rs} | 229 +++++++++--- .../tests/prospective_parachains.rs | 326 ++++++++++++++++++ 3 files changed, 507 insertions(+), 57 deletions(-) rename node/network/collator-protocol/src/collator_side/{tests.rs => tests/mod.rs} (88%) create mode 100644 node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index f1737f6a2142..7a26a62c5f17 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -841,6 +841,7 @@ async fn handle_incoming_request( target: LOG_TARGET, relay_parent = %relay_parent, mode = ?per_relay_parent.prospective_parachains_mode, + ?peer_id, "Collation request version is invalid", ); @@ -1149,10 +1150,13 @@ pub(crate) async fn run( ?candidate_hash, "Sending collation to validator timed out, carrying on with next validator." ); + // Drop all requests from slow peer. waiting.waiting_peers.retain(|(waiting_peer_id, ..)| *waiting_peer_id != peer_id); + waiting.waiting.retain(|req| req.peer_id() != peer_id); } else { waiting.waiting_peers.remove(&(peer_id, candidate_hash)); } + if let Some(next) = waiting.waiting.pop_front() { next } else { @@ -1177,7 +1181,10 @@ pub(crate) async fn run( (ProspectiveParachainsMode::Enabled, VersionedCollationRequest::VStaging(req)) => { per_relay_parent.collations.get(&req.payload.candidate_hash) }, - _ => continue, + _ => { + // Request version is checked in `handle_incoming_request`. + continue + }, } }; diff --git a/node/network/collator-protocol/src/collator_side/tests.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs similarity index 88% rename from node/network/collator-protocol/src/collator_side/tests.rs rename to node/network/collator-protocol/src/collator_side/tests/mod.rs index 1da6966eb266..1f3944a10435 100644 --- a/node/network/collator-protocol/src/collator_side/tests.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -43,11 +43,13 @@ use polkadot_node_subsystem::{ use polkadot_node_subsystem_test_helpers as test_helpers; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::v2::{ - AuthorityDiscoveryId, CollatorPair, GroupRotationInfo, ScheduledCore, SessionIndex, + AuthorityDiscoveryId, CollatorPair, GroupIndex, GroupRotationInfo, ScheduledCore, SessionIndex, SessionInfo, ValidatorId, ValidatorIndex, }; use polkadot_primitives_test_helpers::TestCandidateBuilder; +mod prospective_parachains; + const API_VERSION_PROSPECTIVE_DISABLED: u32 = 2; #[derive(Clone)] @@ -89,7 +91,7 @@ impl Default for TestState { let validator_peer_id = std::iter::repeat_with(|| PeerId::random()).take(discovery_keys.len()).collect(); - let validator_groups = vec![vec![2, 0, 4], vec![3, 2, 4]] + let validator_groups = vec![vec![2, 0, 4], vec![1, 3]] .into_iter() .map(|g| g.into_iter().map(ValidatorIndex).collect()) .collect(); @@ -133,7 +135,8 @@ impl Default for TestState { impl TestState { fn current_group_validator_indices(&self) -> &[ValidatorIndex] { - &self.session_info.validator_groups[0] + let GroupIndex(group_idx) = self.group_rotation_info.group_for_core(CoreIndex(0), 2); + &self.session_info.validator_groups[group_idx as usize] } fn current_session_index(&self) -> SessionIndex { @@ -328,33 +331,22 @@ struct DistributeCollation { pov_block: PoV, } -/// Create some PoV and distribute it. -async fn distribute_collation( +async fn distribute_collation_with_receipt( virtual_overseer: &mut VirtualOverseer, test_state: &TestState, - // whether or not we expect a connection request or not. + relay_parent: Hash, + group_rotation_info: &GroupRotationInfo, should_connect: bool, + candidate: CandidateReceipt, + pov: PoV, + parent_head_data_hash: Hash, ) -> DistributeCollation { - // Now we want to distribute a `PoVBlock` - let pov_block = PoV { block_data: BlockData(vec![42, 43, 44]) }; - - let pov_hash = pov_block.hash(); - let parent_head_data_hash = Hash::zero(); - - let candidate = TestCandidateBuilder { - para_id: test_state.para_id, - relay_parent: test_state.relay_parent, - pov_hash, - ..Default::default() - } - .build(); - overseer_send( virtual_overseer, CollatorProtocolMessage::DistributeCollation( candidate.clone(), parent_head_data_hash, - pov_block.clone(), + pov.clone(), None, ), ) @@ -367,7 +359,7 @@ async fn distribute_collation( relay_parent, RuntimeApiRequest::AvailabilityCores(tx) )) => { - assert_eq!(relay_parent, test_state.relay_parent); + assert_eq!(relay_parent, relay_parent); tx.send(Ok(vec![test_state.availability_core.clone()])).unwrap(); } ); @@ -379,7 +371,7 @@ async fn distribute_collation( relay_parent, RuntimeApiRequest::SessionIndexForChild(tx), )) => { - assert_eq!(relay_parent, test_state.relay_parent); + assert_eq!(relay_parent, relay_parent); tx.send(Ok(test_state.current_session_index())).unwrap(); }, @@ -387,20 +379,20 @@ async fn distribute_collation( relay_parent, RuntimeApiRequest::SessionInfo(index, tx), )) => { - assert_eq!(relay_parent, test_state.relay_parent); + assert_eq!(relay_parent, relay_parent); assert_eq!(index, test_state.current_session_index()); tx.send(Ok(Some(test_state.session_info.clone()))).unwrap(); }, AllMessages::RuntimeApi(RuntimeApiMessage::Request( - relay_parent, + _relay_parent, RuntimeApiRequest::ValidatorGroups(tx), )) => { - assert_eq!(relay_parent, test_state.relay_parent); + assert_eq!(_relay_parent, relay_parent); tx.send(Ok(( test_state.session_info.validator_groups.clone(), - test_state.group_rotation_info.clone(), + group_rotation_info.clone(), ))) .unwrap(); // This call is mandatory - we are done: @@ -421,7 +413,43 @@ async fn distribute_collation( ); } - DistributeCollation { candidate, pov_block } + DistributeCollation { candidate, pov_block: pov } +} + +/// Create some PoV and distribute it. +async fn distribute_collation( + virtual_overseer: &mut VirtualOverseer, + test_state: &TestState, + relay_parent: Hash, + group_rotation_info: &GroupRotationInfo, + // whether or not we expect a connection request or not. + should_connect: bool, +) -> DistributeCollation { + // Now we want to distribute a `PoVBlock` + let pov_block = PoV { block_data: BlockData(vec![42, 43, 44]) }; + + let pov_hash = pov_block.hash(); + let parent_head_data_hash = Hash::zero(); + + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent, + pov_hash, + ..Default::default() + } + .build(); + + distribute_collation_with_receipt( + virtual_overseer, + test_state, + relay_parent, + group_rotation_info, + should_connect, + candidate, + pov_block, + parent_head_data_hash, + ) + .await } /// Connect a peer @@ -495,28 +523,54 @@ async fn expect_declare_msg( } /// Check that the next received message is a collation advertisement message. +/// +/// Expects vstaging message if `expected_candidate_hash` is `Some`, v1 otherwise. async fn expect_advertise_collation_msg( virtual_overseer: &mut VirtualOverseer, peer: &PeerId, expected_relay_parent: Hash, + expected_candidate_hash: Option, ) { assert_matches!( overseer_recv(virtual_overseer).await, AllMessages::NetworkBridge( NetworkBridgeMessage::SendCollationMessage( to, - Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), + wire_message, ) ) => { assert_eq!(to[0], *peer); - assert_matches!( - wire_message, - protocol_v1::CollatorProtocolMessage::AdvertiseCollation( - relay_parent, + match (expected_candidate_hash, wire_message) { + (None, Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message))) => { + assert_matches!( + wire_message, + protocol_v1::CollatorProtocolMessage::AdvertiseCollation( + relay_parent, + ) => { + assert_eq!(relay_parent, expected_relay_parent); + } + ); + }, + ( + Some(expected_candidate_hash), + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + wire_message, + )), ) => { - assert_eq!(relay_parent, expected_relay_parent); - } - ); + assert_matches!( + wire_message, + protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { + relay_parent, + candidate_hash, + .. + } => { + assert_eq!(relay_parent, expected_relay_parent); + assert_eq!(candidate_hash, expected_candidate_hash); + } + ); + }, + _ => panic!("Invalid advertisement"), + } } ); } @@ -550,8 +604,14 @@ fn advertise_and_send_collation() { setup_system(&mut virtual_overseer, &test_state).await; - let DistributeCollation { candidate, pov_block } = - distribute_collation(&mut virtual_overseer, &test_state, true).await; + let DistributeCollation { candidate, pov_block } = distribute_collation( + &mut virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; for (val, peer) in test_state .current_group_validator_authority_ids() @@ -575,7 +635,8 @@ fn advertise_and_send_collation() { // The peer is interested in a leaf that we have a collation for; // advertise it. - expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; + expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent, None) + .await; // Request a collation. let (pending_response, rx) = oneshot::channel(); @@ -668,7 +729,14 @@ fn advertise_and_send_collation() { assert!(overseer_recv_with_timeout(&mut virtual_overseer, TIMEOUT).await.is_none()); - distribute_collation(&mut virtual_overseer, &test_state, true).await; + distribute_collation( + &mut virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; // Send info about peer's view. overseer_send( @@ -680,7 +748,8 @@ fn advertise_and_send_collation() { ) .await; - expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent).await; + expect_advertise_collation_msg(&mut virtual_overseer, &peer, test_state.relay_parent, None) + .await; TestHarness { virtual_overseer, req_v1_cfg, req_vstaging_cfg } }); } @@ -756,15 +825,24 @@ fn collations_are_only_advertised_to_validators_with_correct_view() { // And let it tell us that it is has the same view. send_peer_view_change(virtual_overseer, &peer2, vec![test_state.relay_parent]).await; - distribute_collation(virtual_overseer, &test_state, true).await; + distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; - expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent).await; + expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent, None) + .await; // The other validator announces that it changed its view. send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; // After changing the view we should receive the advertisement - expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent).await; + expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent, None) + .await; test_harness }) } @@ -795,21 +873,36 @@ fn collate_on_two_different_relay_chain_blocks() { expect_declare_msg(virtual_overseer, &test_state, &peer).await; expect_declare_msg(virtual_overseer, &test_state, &peer2).await; - distribute_collation(virtual_overseer, &test_state, true).await; + distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; let old_relay_parent = test_state.relay_parent; // Advance to a new round, while informing the subsystem that the old and the new relay parent are active. test_state.advance_to_new_round(virtual_overseer, true).await; - distribute_collation(virtual_overseer, &test_state, true).await; + distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; send_peer_view_change(virtual_overseer, &peer, vec![old_relay_parent]).await; - expect_advertise_collation_msg(virtual_overseer, &peer, old_relay_parent).await; + expect_advertise_collation_msg(virtual_overseer, &peer, old_relay_parent, None).await; send_peer_view_change(virtual_overseer, &peer2, vec![test_state.relay_parent]).await; - expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent).await; + expect_advertise_collation_msg(virtual_overseer, &peer2, test_state.relay_parent, None) + .await; test_harness }) } @@ -832,10 +925,18 @@ fn validator_reconnect_does_not_advertise_a_second_time() { connect_peer(virtual_overseer, peer.clone(), Some(validator_id.clone())).await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; - distribute_collation(virtual_overseer, &test_state, true).await; + distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; - expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent).await; + expect_advertise_collation_msg(virtual_overseer, &peer, test_state.relay_parent, None) + .await; // Disconnect and reconnect directly disconnect_peer(virtual_overseer, peer.clone()).await; @@ -916,8 +1017,14 @@ where setup_system(virtual_overseer, &test_state).await; - let DistributeCollation { candidate, pov_block } = - distribute_collation(virtual_overseer, &test_state, true).await; + let DistributeCollation { candidate, pov_block } = distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; for (val, peer) in test_state .current_group_validator_authority_ids() @@ -943,10 +1050,20 @@ where // The peer is interested in a leaf that we have a collation for; // advertise it. - expect_advertise_collation_msg(virtual_overseer, &validator_0, test_state.relay_parent) - .await; - expect_advertise_collation_msg(virtual_overseer, &validator_1, test_state.relay_parent) - .await; + expect_advertise_collation_msg( + virtual_overseer, + &validator_0, + test_state.relay_parent, + None, + ) + .await; + expect_advertise_collation_msg( + virtual_overseer, + &validator_1, + test_state.relay_parent, + None, + ) + .await; // Request a collation. let (pending_response, rx) = oneshot::channel(); diff --git a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs new file mode 100644 index 000000000000..782b8c604123 --- /dev/null +++ b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -0,0 +1,326 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Tests for the collator side with enabled prospective parachains. + +use super::*; + +use polkadot_node_subsystem::messages::{ChainApiMessage, ProspectiveParachainsMessage}; +use polkadot_primitives::v2::Header; + +const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; + +const ALLOWED_ANCESTRY: u32 = 3; + +fn get_parent_hash(hash: Hash) -> Hash { + Hash::from_low_u64_be(hash.to_low_u64_be() + 1) +} + +/// Handle a view update. +async fn update_view( + virtual_overseer: &mut VirtualOverseer, + test_state: &TestState, + new_view: Vec<(Hash, u32)>, // Hash and block number. + activated: u8, // How many new heads does this update contain? +) { + let new_view: HashMap = HashMap::from_iter(new_view); + + let our_view = + OurView::new(new_view.keys().map(|hash| (*hash, Arc::new(jaeger::Span::Disabled))), 0); + + overseer_send( + virtual_overseer, + CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange(our_view)), + ) + .await; + + let mut next_overseer_message = None; + for _ in 0..activated { + let (leaf_hash, leaf_number) = assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + parent, + RuntimeApiRequest::Version(tx), + )) => { + tx.send(Ok(API_VERSION_PROSPECTIVE_ENABLED)).unwrap(); + (parent, new_view.get(&parent).copied().expect("Unknown parent requested")) + } + ); + + let min_number = leaf_number.saturating_sub(ALLOWED_ANCESTRY); + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::ProspectiveParachains( + ProspectiveParachainsMessage::GetMinimumRelayParents(parent, tx), + ) if parent == leaf_hash => { + tx.send(vec![(test_state.para_id, min_number)]).unwrap(); + } + ); + + let ancestry_len = leaf_number + 1 - min_number; + let ancestry_hashes = std::iter::successors(Some(leaf_hash), |h| Some(get_parent_hash(*h))) + .take(ancestry_len as usize); + let ancestry_numbers = (min_number..=leaf_number).rev(); + let mut ancestry_iter = ancestry_hashes.clone().zip(ancestry_numbers).peekable(); + + loop { + let (hash, number) = match ancestry_iter.next() { + Some((hash, number)) => (hash, number), + None => break, + }; + + // May be `None` for the last element. + let parent_hash = + ancestry_iter.peek().map(|(h, _)| *h).unwrap_or_else(|| get_parent_hash(hash)); + + let msg = match next_overseer_message.take() { + Some(msg) => Some(msg), + None => + overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(50)).await, + }; + + let msg = match msg { + Some(msg) => msg, + None => { + // We're done. + return + }, + }; + + if !matches!( + &msg, + AllMessages::ChainApi(ChainApiMessage::BlockHeader(_hash, ..)) + if *_hash == hash + ) { + // Ancestry has already been cached for this leaf. + next_overseer_message.replace(msg); + break + } + + assert_matches!( + msg, + AllMessages::ChainApi(ChainApiMessage::BlockHeader(.., tx)) => { + let header = Header { + parent_hash, + number, + state_root: Hash::zero(), + extrinsics_root: Hash::zero(), + digest: Default::default(), + }; + + tx.send(Ok(Some(header))).unwrap(); + } + ); + } + } +} + +/// Test that a collator distributes a collation from the allowed ancestry +/// to correct validators group. +#[test] +fn distribute_collation_from_implicit_view() { + let head_c = Hash::from_low_u64_be(126); + let head_c_num: u32 = 66; + let head_a = Hash::from_low_u64_be(128); + let head_a_num: u32 = 64; + + // Grandparent of head `a`. + let head_b = Hash::from_low_u64_be(130); + let head_b_num = 62; + + let group_rotation_info = GroupRotationInfo { + session_start_block: head_b_num - 2, + group_rotation_frequency: 3, + now: head_b_num, + }; + + let mut test_state = TestState::default(); + test_state.group_rotation_info = group_rotation_info; + + let local_peer_id = test_state.local_peer_id; + let collator_pair = test_state.collator_pair.clone(); + + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; + + // Set collating para id. + overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id)) + .await; + // Activated leaf is `a`, but the collation will be based on `b`. + update_view(virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; + + let validator_peer_ids = test_state.current_group_validator_peer_ids(); + for (val, peer) in test_state + .current_group_validator_authority_ids() + .into_iter() + .zip(validator_peer_ids.clone()) + { + connect_peer(virtual_overseer, peer.clone(), Some(val.clone())).await; + } + + // Collator declared itself to each peer. + for peer_id in &validator_peer_ids { + expect_declare_msg(virtual_overseer, &test_state, peer_id).await; + } + + let pov = PoV { block_data: BlockData(vec![1, 2, 3]) }; + let parent_head_data_hash = Hash::repeat_byte(0xAA); + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + let DistributeCollation { candidate, pov_block: _ } = distribute_collation_with_receipt( + virtual_overseer, + &test_state, + head_b, + &test_state.group_rotation_info, + false, // Check the group manually. + candidate, + pov, + parent_head_data_hash, + ) + .await; + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::NetworkBridge( + NetworkBridgeMessage::ConnectToValidators { validator_ids, .. } + ) => { + let expected_validators = test_state.current_group_validator_authority_ids(); + + assert_eq!(expected_validators, validator_ids); + } + ); + + let candidate_hash = Some(candidate.hash()); + + // Update peer views. + for peed_id in &validator_peer_ids { + send_peer_view_change(virtual_overseer, peed_id, vec![head_a]).await; + expect_advertise_collation_msg(virtual_overseer, peed_id, head_b, candidate_hash).await; + } + + // Head `b` goes out of view. + // Build a different candidate for this relay parent and attempt to distribute it. + update_view(virtual_overseer, &test_state, vec![(head_c, head_c_num)], 1).await; + + let pov = PoV { block_data: BlockData(vec![4, 5, 6]) }; + let parent_head_data_hash = Hash::repeat_byte(0xBB); + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + overseer_send( + virtual_overseer, + CollatorProtocolMessage::DistributeCollation( + candidate.clone(), + parent_head_data_hash, + pov.clone(), + None, + ), + ) + .await; + + // Parent out of view, nothing happens. + assert!(overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(100)) + .await + .is_none()); + + test_harness + }) +} + +/// Tests that collator can distribute up to `MAX_CANDIDATE_DEPTH + 1` candidates +/// per relay parent. +#[test] +fn distribute_collation_up_to_limit() { + let test_state = TestState::default(); + + let local_peer_id = test_state.local_peer_id; + let collator_pair = test_state.collator_pair.clone(); + + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; + + let head_a = Hash::from_low_u64_be(128); + let head_a_num: u32 = 64; + + // Grandparent of head `a`. + let head_b = Hash::from_low_u64_be(130); + + // Set collating para id. + overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id)) + .await; + // Activated leaf is `a`, but the collation will be based on `b`. + update_view(virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; + + for i in 0..(MAX_CANDIDATE_DEPTH + 1) { + let pov = PoV { block_data: BlockData(vec![i as u8]) }; + let parent_head_data_hash = Hash::repeat_byte(0xAA); + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + distribute_collation_with_receipt( + virtual_overseer, + &test_state, + head_b, + &test_state.group_rotation_info, + true, + candidate, + pov, + parent_head_data_hash, + ) + .await; + } + + let pov = PoV { block_data: BlockData(vec![10, 12, 6]) }; + let parent_head_data_hash = Hash::repeat_byte(0xBB); + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + overseer_send( + virtual_overseer, + CollatorProtocolMessage::DistributeCollation( + candidate.clone(), + parent_head_data_hash, + pov.clone(), + None, + ), + ) + .await; + + // Limit has been reached. + assert!(overseer_recv_with_timeout(virtual_overseer, Duration::from_millis(100)) + .await + .is_none()); + + test_harness + }) +} From 1997a84c77dc928449d453c056e66f0853cf8357 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 6 Sep 2022 19:14:10 +0300 Subject: [PATCH 20/45] Send collation test --- .../src/collator_side/tests/mod.rs | 93 ++++++----- .../tests/prospective_parachains.rs | 149 +++++++++++++++--- 2 files changed, 182 insertions(+), 60 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/tests/mod.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs index 1f3944a10435..09b8e335ce83 100644 --- a/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -529,50 +529,59 @@ async fn expect_advertise_collation_msg( virtual_overseer: &mut VirtualOverseer, peer: &PeerId, expected_relay_parent: Hash, - expected_candidate_hash: Option, + expected_candidate_hashes: Option>, ) { - assert_matches!( - overseer_recv(virtual_overseer).await, - AllMessages::NetworkBridge( - NetworkBridgeMessage::SendCollationMessage( - to, - wire_message, - ) - ) => { - assert_eq!(to[0], *peer); - match (expected_candidate_hash, wire_message) { - (None, Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message))) => { - assert_matches!( - wire_message, - protocol_v1::CollatorProtocolMessage::AdvertiseCollation( - relay_parent, - ) => { - assert_eq!(relay_parent, expected_relay_parent); - } - ); - }, - ( - Some(expected_candidate_hash), - Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( - wire_message, - )), - ) => { - assert_matches!( - wire_message, - protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { - relay_parent, - candidate_hash, - .. - } => { - assert_eq!(relay_parent, expected_relay_parent); - assert_eq!(candidate_hash, expected_candidate_hash); - } - ); - }, - _ => panic!("Invalid advertisement"), + let mut candidate_hashes: Option> = + expected_candidate_hashes.map(|hashes| hashes.into_iter().collect()); + let iter_num = candidate_hashes.as_ref().map(|hashes| hashes.len()).unwrap_or(1); + + for _ in 0..iter_num { + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::NetworkBridge( + NetworkBridgeMessage::SendCollationMessage( + to, + wire_message, + ) + ) => { + assert_eq!(to[0], *peer); + match (candidate_hashes.as_mut(), wire_message) { + (None, Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message))) => { + assert_matches!( + wire_message, + protocol_v1::CollatorProtocolMessage::AdvertiseCollation( + relay_parent, + ) => { + assert_eq!(relay_parent, expected_relay_parent); + } + ); + }, + ( + Some(candidate_hashes), + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + wire_message, + )), + ) => { + assert_matches!( + wire_message, + protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { + relay_parent, + candidate_hash, + .. + } => { + assert_eq!(relay_parent, expected_relay_parent); + assert!(candidate_hashes.contains(&candidate_hash)); + + // Drop the hash we've already seen. + candidate_hashes.remove(&candidate_hash); + } + ); + }, + _ => panic!("Invalid advertisement"), + } } - } - ); + ); + } } /// Send a message that the given peer's view changed. diff --git a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs index 782b8c604123..b2e9f9974b77 100644 --- a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -133,19 +133,21 @@ async fn update_view( /// to correct validators group. #[test] fn distribute_collation_from_implicit_view() { - let head_c = Hash::from_low_u64_be(126); - let head_c_num: u32 = 66; - let head_a = Hash::from_low_u64_be(128); - let head_a_num: u32 = 64; + let head_a = Hash::from_low_u64_be(126); + let head_a_num: u32 = 66; // Grandparent of head `a`. - let head_b = Hash::from_low_u64_be(130); - let head_b_num = 62; + let head_b = Hash::from_low_u64_be(128); + let head_b_num: u32 = 64; + + // Grandparent of head `b`. + let head_c = Hash::from_low_u64_be(130); + let head_c_num = 62; let group_rotation_info = GroupRotationInfo { - session_start_block: head_b_num - 2, + session_start_block: head_c_num - 2, group_rotation_frequency: 3, - now: head_b_num, + now: head_c_num, }; let mut test_state = TestState::default(); @@ -160,8 +162,8 @@ fn distribute_collation_from_implicit_view() { // Set collating para id. overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id)) .await; - // Activated leaf is `a`, but the collation will be based on `b`. - update_view(virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; + // Activated leaf is `b`, but the collation will be based on `c`. + update_view(virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; let validator_peer_ids = test_state.current_group_validator_peer_ids(); for (val, peer) in test_state @@ -181,7 +183,7 @@ fn distribute_collation_from_implicit_view() { let parent_head_data_hash = Hash::repeat_byte(0xAA); let candidate = TestCandidateBuilder { para_id: test_state.para_id, - relay_parent: head_b, + relay_parent: head_c, pov_hash: pov.hash(), ..Default::default() } @@ -189,7 +191,7 @@ fn distribute_collation_from_implicit_view() { let DistributeCollation { candidate, pov_block: _ } = distribute_collation_with_receipt( virtual_overseer, &test_state, - head_b, + head_c, &test_state.group_rotation_info, false, // Check the group manually. candidate, @@ -208,23 +210,29 @@ fn distribute_collation_from_implicit_view() { } ); - let candidate_hash = Some(candidate.hash()); + let candidate_hash = candidate.hash(); // Update peer views. for peed_id in &validator_peer_ids { - send_peer_view_change(virtual_overseer, peed_id, vec![head_a]).await; - expect_advertise_collation_msg(virtual_overseer, peed_id, head_b, candidate_hash).await; + send_peer_view_change(virtual_overseer, peed_id, vec![head_b]).await; + expect_advertise_collation_msg( + virtual_overseer, + peed_id, + head_c, + Some(vec![candidate_hash]), + ) + .await; } - // Head `b` goes out of view. + // Head `c` goes out of view. // Build a different candidate for this relay parent and attempt to distribute it. - update_view(virtual_overseer, &test_state, vec![(head_c, head_c_num)], 1).await; + update_view(virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; let pov = PoV { block_data: BlockData(vec![4, 5, 6]) }; let parent_head_data_hash = Hash::repeat_byte(0xBB); let candidate = TestCandidateBuilder { para_id: test_state.para_id, - relay_parent: head_b, + relay_parent: head_c, pov_hash: pov.hash(), ..Default::default() } @@ -324,3 +332,108 @@ fn distribute_collation_up_to_limit() { test_harness }) } + +/// Tests that collator correctly handles peer V2 requests. +#[test] +fn advertise_and_send_collation_by_hash() { + let test_state = TestState::default(); + + let local_peer_id = test_state.local_peer_id; + let collator_pair = test_state.collator_pair.clone(); + + test_harness(local_peer_id, collator_pair, |test_harness| async move { + let mut virtual_overseer = test_harness.virtual_overseer; + let req_v1_cfg = test_harness.req_v1_cfg; + let mut req_vstaging_cfg = test_harness.req_vstaging_cfg; + + let head_a = Hash::from_low_u64_be(128); + let head_a_num: u32 = 64; + + // Parent of head `a`. + let head_b = Hash::from_low_u64_be(129); + let head_b_num: u32 = 63; + + // Set collating para id. + overseer_send( + &mut virtual_overseer, + CollatorProtocolMessage::CollateOn(test_state.para_id), + ) + .await; + update_view(&mut virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; + update_view(&mut virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; + + let candidates: Vec<_> = (0..2) + .map(|i| { + let pov = PoV { block_data: BlockData(vec![i as u8]) }; + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + (candidate, pov) + }) + .collect(); + for (candidate, pov) in &candidates { + distribute_collation_with_receipt( + &mut virtual_overseer, + &test_state, + head_b, + &test_state.group_rotation_info, + true, + candidate.clone(), + pov.clone(), + Hash::zero(), + ) + .await; + } + + let peer = test_state.validator_peer_id[0].clone(); + let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); + connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id.clone())).await; + expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; + + // Head `b` is not a leaf, but both advertisements are still relevant. + send_peer_view_change(&mut virtual_overseer, &peer, vec![head_b]).await; + let hashes: Vec<_> = candidates.iter().map(|(candidate, _)| candidate.hash()).collect(); + expect_advertise_collation_msg(&mut virtual_overseer, &peer, head_b, Some(hashes)).await; + + for (candidate, pov_block) in candidates { + let (pending_response, rx) = oneshot::channel(); + req_vstaging_cfg + .inbound_queue + .as_mut() + .unwrap() + .send(RawIncomingRequest { + peer, + payload: request_vstaging::CollationFetchingRequest { + relay_parent: head_b, + para_id: test_state.para_id, + candidate_hash: candidate.hash(), + } + .encode(), + pending_response, + }) + .await + .unwrap(); + + assert_matches!( + rx.await, + Ok(full_response) => { + // Response is the same for vstaging. + let request_v1::CollationFetchingResponse::Collation(receipt, pov): request_v1::CollationFetchingResponse + = request_v1::CollationFetchingResponse::decode( + &mut full_response.result + .expect("We should have a proper answer").as_ref() + ) + .expect("Decoding should work"); + assert_eq!(receipt, candidate); + assert_eq!(pov, pov_block); + } + ); + } + + TestHarness { virtual_overseer, req_v1_cfg, req_vstaging_cfg } + }) +} From 9c7481443061fbef75f4186d22944a86519fcced Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 13 Sep 2022 17:30:27 +0300 Subject: [PATCH 21/45] fmt --- .../src/collator_side/tests/mod.rs | 7 ++++++- .../protocol/src/request_response/mod.rs | 19 ++++++++++--------- node/service/src/lib.rs | 6 ++++-- node/service/src/overseer.rs | 4 +++- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/tests/mod.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs index 255e14a0e361..b79769320079 100644 --- a/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -29,7 +29,12 @@ use sp_core::crypto::Pair; use sp_keyring::Sr25519Keyring; use sp_runtime::traits::AppVerify; -use polkadot_node_network_protocol::{our_view, request_response::{IncomingRequest, ReqProtocolNames}, view, peer_set::CollationVersion}; +use polkadot_node_network_protocol::{ + our_view, + peer_set::CollationVersion, + request_response::{IncomingRequest, ReqProtocolNames}, + view, +}; use polkadot_node_primitives::BlockData; use polkadot_node_subsystem::{ jaeger, diff --git a/node/network/protocol/src/request_response/mod.rs b/node/network/protocol/src/request_response/mod.rs index 20d9c44ff8c4..ce0fd05bdc31 100644 --- a/node/network/protocol/src/request_response/mod.rs +++ b/node/network/protocol/src/request_response/mod.rs @@ -148,15 +148,16 @@ impl Protocol { request_timeout: CHUNK_REQUEST_TIMEOUT, inbound_queue: Some(tx), }, - Protocol::CollationFetchingV1 | Protocol::CollationFetchingVStaging => RequestResponseConfig { - name, - fallback_names, - max_request_size: 1_000, - max_response_size: POV_RESPONSE_SIZE, - // Taken from initial implementation in collator protocol: - request_timeout: POV_REQUEST_TIMEOUT_CONNECTED, - inbound_queue: Some(tx), - }, + Protocol::CollationFetchingV1 | Protocol::CollationFetchingVStaging => + RequestResponseConfig { + name, + fallback_names, + max_request_size: 1_000, + max_response_size: POV_RESPONSE_SIZE, + // Taken from initial implementation in collator protocol: + request_timeout: POV_REQUEST_TIMEOUT_CONNECTED, + inbound_queue: Some(tx), + }, Protocol::PoVFetchingV1 => RequestResponseConfig { name, fallback_names, diff --git a/node/service/src/lib.rs b/node/service/src/lib.rs index e3ebc8330361..5b19a6c0f67a 100644 --- a/node/service/src/lib.rs +++ b/node/service/src/lib.rs @@ -872,9 +872,11 @@ where config.network.request_response_protocols.push(cfg); let (chunk_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names); config.network.request_response_protocols.push(cfg); - let (collation_req_v1_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names); + let (collation_req_v1_receiver, cfg) = + IncomingRequest::get_config_receiver(&req_protocol_names); config.network.request_response_protocols.push(cfg); - let (collation_req_vstaging_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names); + let (collation_req_vstaging_receiver, cfg) = + IncomingRequest::get_config_receiver(&req_protocol_names); config.network.request_response_protocols.push(cfg); let (available_data_req_receiver, cfg) = IncomingRequest::get_config_receiver(&req_protocol_names); diff --git a/node/service/src/overseer.rs b/node/service/src/overseer.rs index 6d6574bbfe47..3f369531ffc8 100644 --- a/node/service/src/overseer.rs +++ b/node/service/src/overseer.rs @@ -26,7 +26,9 @@ use polkadot_node_core_chain_selection::Config as ChainSelectionConfig; use polkadot_node_core_dispute_coordinator::Config as DisputeCoordinatorConfig; use polkadot_node_network_protocol::{ peer_set::PeerSetProtocolNames, - request_response::{v1 as request_v1, IncomingRequestReceiver, ReqProtocolNames, vstaging as request_vstaging}, + request_response::{ + v1 as request_v1, vstaging as request_vstaging, IncomingRequestReceiver, ReqProtocolNames, + }, }; #[cfg(any(feature = "malus", test))] pub use polkadot_overseer::{ From cf09d403e259a00e44ce6c20e4860e5124687587 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 13 Sep 2022 20:40:10 +0300 Subject: [PATCH 22/45] Collator side network protocol versioning --- Cargo.lock | 36 ++-- node/network/collator-protocol/Cargo.toml | 2 +- .../src/collator_side/collation.rs | 2 +- .../src/collator_side/metrics.rs | 2 +- .../src/collator_side/mod.rs | 167 ++++++++++++------ .../src/collator_side/tests/mod.rs | 133 ++++++++++++-- .../tests/prospective_parachains.rs | 54 +++++- 7 files changed, 309 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f01752165b83..9f45639424c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -553,9 +553,9 @@ dependencies = [ [[package]] name = "bitvec" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1489fcb93a5bb47da0462ca93ad252ad6af2145cce58d10d46a83931ba9f016b" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ "funty 2.0.0", "radium 0.7.0", @@ -3220,7 +3220,7 @@ name = "kusama-runtime" version = "0.9.28" dependencies = [ "beefy-primitives", - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-benchmarking", "frame-election-provider-support", "frame-executive", @@ -5755,7 +5755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9182e4a71cae089267ab03e67c99368db7cd877baf50f931e5d6d4b71e195ac0" dependencies = [ "arrayvec 0.7.2", - "bitvec 1.0.0", + "bitvec 1.0.1", "byte-slice-cast", "bytes", "impl-trait-for-tuples", @@ -6062,7 +6062,7 @@ version = "0.9.28" dependencies = [ "always-assert", "assert_matches", - "bitvec 1.0.0", + "bitvec 1.0.1", "env_logger 0.9.0", "futures", "log", @@ -6213,7 +6213,7 @@ version = "0.9.28" dependencies = [ "always-assert", "assert_matches", - "bitvec 1.0.0", + "bitvec 1.0.1", "env_logger 0.9.0", "fatality", "futures", @@ -6373,7 +6373,7 @@ version = "0.9.28" dependencies = [ "assert_matches", "async-trait", - "bitvec 1.0.0", + "bitvec 1.0.1", "derive_more", "futures", "futures-timer", @@ -6411,7 +6411,7 @@ name = "polkadot-node-core-av-store" version = "0.9.28" dependencies = [ "assert_matches", - "bitvec 1.0.0", + "bitvec 1.0.1", "env_logger 0.9.0", "futures", "futures-timer", @@ -6439,7 +6439,7 @@ name = "polkadot-node-core-backing" version = "0.9.28" dependencies = [ "assert_matches", - "bitvec 1.0.0", + "bitvec 1.0.1", "fatality", "futures", "polkadot-erasure-coding", @@ -6586,7 +6586,7 @@ name = "polkadot-node-core-prospective-parachains" version = "0.9.16" dependencies = [ "assert_matches", - "bitvec 1.0.0", + "bitvec 1.0.1", "fatality", "futures", "parity-scale-codec 2.3.1", @@ -6603,7 +6603,7 @@ dependencies = [ name = "polkadot-node-core-provisioner" version = "0.9.28" dependencies = [ - "bitvec 1.0.0", + "bitvec 1.0.1", "fatality", "futures", "futures-timer", @@ -6937,7 +6937,7 @@ dependencies = [ name = "polkadot-primitives" version = "0.9.28" dependencies = [ - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-system", "hex-literal", "parity-scale-codec 3.1.5", @@ -7009,7 +7009,7 @@ name = "polkadot-runtime" version = "0.9.28" dependencies = [ "beefy-primitives", - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-benchmarking", "frame-election-provider-support", "frame-executive", @@ -7104,7 +7104,7 @@ name = "polkadot-runtime-common" version = "0.9.28" dependencies = [ "beefy-primitives", - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-benchmarking", "frame-election-provider-support", "frame-support", @@ -7178,7 +7178,7 @@ version = "0.9.28" dependencies = [ "assert_matches", "bitflags", - "bitvec 1.0.0", + "bitvec 1.0.1", "derive_more", "frame-benchmarking", "frame-support", @@ -7434,7 +7434,7 @@ name = "polkadot-test-runtime" version = "0.9.28" dependencies = [ "beefy-primitives", - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-election-provider-support", "frame-executive", "frame-support", @@ -9501,7 +9501,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c46be926081c9f4dd5dd9b6f1d3e3229f2360bc6502dd8836f84a93b7c75e99a" dependencies = [ - "bitvec 1.0.0", + "bitvec 1.0.1", "cfg-if 1.0.0", "derive_more", "parity-scale-codec 3.1.5", @@ -12287,7 +12287,7 @@ name = "westend-runtime" version = "0.9.28" dependencies = [ "beefy-primitives", - "bitvec 1.0.0", + "bitvec 1.0.1", "frame-benchmarking", "frame-election-provider-support", "frame-executive", diff --git a/node/network/collator-protocol/Cargo.toml b/node/network/collator-protocol/Cargo.toml index b0baa5a67d14..0a6d0589eaa6 100644 --- a/node/network/collator-protocol/Cargo.toml +++ b/node/network/collator-protocol/Cargo.toml @@ -9,7 +9,7 @@ always-assert = "0.1.2" futures = "0.3.21" futures-timer = "3" gum = { package = "tracing-gum", path = "../../gum" } -bitvec = { version = "1.0.0", default-features = false, features = ["alloc"] } +bitvec = { version = "1.0.1", default-features = false, features = ["alloc"] } sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" } sp-runtime = { git = "https://github.com/paritytech/substrate", branch = "master" } diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs index 196f6f5102df..b520c9fda55d 100644 --- a/node/network/collator-protocol/src/collator_side/collation.rs +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -74,7 +74,7 @@ pub struct WaitingCollationFetches { /// If set to `true`, any new request will be queued. pub collation_fetch_active: bool, /// The collation fetches waiting to be fulfilled. - pub waiting: VecDeque, + pub req_queue: VecDeque, /// All peers that are waiting or actively uploading. /// /// We will not accept multiple requests from the same peer, otherwise our DoS protection of diff --git a/node/network/collator-protocol/src/collator_side/metrics.rs b/node/network/collator-protocol/src/collator_side/metrics.rs index 99da64f13278..04a9806605ab 100644 --- a/node/network/collator-protocol/src/collator_side/metrics.rs +++ b/node/network/collator-protocol/src/collator_side/metrics.rs @@ -20,7 +20,7 @@ use polkadot_node_subsystem_util::metrics::{self, prometheus}; pub struct Metrics(Option); impl Metrics { - pub fn on_advertisment_made(&self) { + pub fn on_advertisement_made(&self) { if let Some(metrics) = &self.0 { metrics.advertisements_made.inc(); } diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 97840dacbd79..b2be7b3717ee 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -16,16 +16,17 @@ use std::{ collections::{HashMap, HashSet}, + convert::TryInto, time::Duration, }; -use bitvec::prelude::*; +use bitvec::{bitvec, vec::BitVec}; use futures::{channel::oneshot, pin_mut, select, FutureExt, StreamExt}; use sp_core::Pair; use polkadot_node_network_protocol::{ self as net_protocol, - peer_set::PeerSet, + peer_set::{CollationVersion, PeerSet}, request_response::{ incoming::{self, OutgoingResponse}, v1 as request_v1, vstaging as request_vstaging, IncomingRequestReceiver, @@ -92,7 +93,7 @@ struct ValidatorGroup { /// Bits indicating which validators have already seen the announcement /// per candidate. - advertised_to: HashMap>, + advertised_to: HashMap, } impl ValidatorGroup { @@ -146,13 +147,21 @@ impl ValidatorGroup { }; self.advertised_to .entry(*candidate_hash) - .or_insert_with(|| bitvec![u8, Lsb0; 0; self.validators.len()]) + .or_insert_with(|| bitvec![0; self.validators.len()]) .set(validator_index, true); } } } } +#[derive(Debug)] +struct PeerData { + /// Peer's view. + view: View, + /// Network protocol version. + version: CollationVersion, +} + struct PerRelayParent { prospective_parachains_mode: ProspectiveParachainsMode, /// Validators group responsible for backing candidates built @@ -185,7 +194,7 @@ struct State { /// Track all active peers and their views /// to determine what is relevant to them. - peer_views: HashMap, + peer_data: HashMap, /// Leaves that do support asynchronous backing along with /// implicit ancestry. Leaves from the implicit view are present in @@ -241,7 +250,7 @@ impl State { collator_pair, metrics, collating_on: Default::default(), - peer_views: Default::default(), + peer_data: Default::default(), implicit_view: Default::default(), active_leaves: Default::default(), per_relay_parent: Default::default(), @@ -384,29 +393,29 @@ async fn distribute_collation( // It's collation-producer responsibility to verify that there exists // a hypothetical membership in a fragment tree for candidate. - let interested: Vec = state - .peer_views - .iter() - .filter(|(_, v)| match relay_parent_mode { - ProspectiveParachainsMode::Disabled => v.contains(&candidate_relay_parent), - ProspectiveParachainsMode::Enabled => v.iter().any(|block_hash| { - state - .implicit_view - .known_allowed_relay_parents_under(block_hash, Some(id)) - .unwrap_or_default() - .contains(&candidate_relay_parent) - }), - }) - .map(|(peer, _)| *peer) - .collect(); + let interested = + state + .peer_data + .iter() + .filter(|(_, PeerData { view: v, .. })| match relay_parent_mode { + ProspectiveParachainsMode::Disabled => v.contains(&candidate_relay_parent), + ProspectiveParachainsMode::Enabled => v.iter().any(|block_hash| { + state + .implicit_view + .known_allowed_relay_parents_under(block_hash, Some(id)) + .unwrap_or_default() + .contains(&candidate_relay_parent) + }), + }); // Make sure already connected peers get collations: - for peer_id in interested { + for (peer_id, peer_data) in interested { advertise_collation( ctx, candidate_relay_parent, per_relay_parent, - &peer_id, + peer_id, + peer_data.version, &state.peer_ids, &state.metrics, ) @@ -491,23 +500,48 @@ async fn determine_our_validators( Ok(current_validators) } +fn declare_message( + state: &mut State, + version: CollationVersion, +) -> Option> { + let para_id = state.collating_on?; + Some(match version { + CollationVersion::V1 => { + let declare_signature_payload = + protocol_v1::declare_signature_payload(&state.local_peer_id); + let wire_message = protocol_v1::CollatorProtocolMessage::Declare( + state.collator_pair.public(), + para_id, + state.collator_pair.sign(&declare_signature_payload), + ); + Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)) + }, + CollationVersion::VStaging => { + let declare_signature_payload = + protocol_vstaging::declare_signature_payload(&state.local_peer_id); + let wire_message = protocol_vstaging::CollatorProtocolMessage::Declare( + state.collator_pair.public(), + para_id, + state.collator_pair.sign(&declare_signature_payload), + ); + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + wire_message, + )) + }, + }) +} + /// Issue a `Declare` collation message to the given `peer`. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] -async fn declare(ctx: &mut Context, state: &mut State, peer: PeerId) { - let declare_signature_payload = protocol_v1::declare_signature_payload(&state.local_peer_id); - - if let Some(para_id) = state.collating_on { - let wire_message = protocol_v1::CollatorProtocolMessage::Declare( - state.collator_pair.public(), - para_id, - state.collator_pair.sign(&declare_signature_payload), - ); - - ctx.send_message(NetworkBridgeTxMessage::SendCollationMessage( - vec![peer], - Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), - )) - .await; +async fn declare( + ctx: &mut Context, + state: &mut State, + peer: &PeerId, + version: CollationVersion, +) { + if let Some(wire_message) = declare_message(state, version) { + ctx.send_message(NetworkBridgeTxMessage::SendCollationMessage(vec![*peer], wire_message)) + .await; } } @@ -539,6 +573,7 @@ async fn advertise_collation( relay_parent: Hash, per_relay_parent: &mut PerRelayParent, peer: &PeerId, + protocol_version: CollationVersion, peer_ids: &HashMap>, metrics: &Metrics, ) { @@ -566,8 +601,8 @@ async fn advertise_collation( ); collation.status.advance_to_advertised(); - let collation_message = match per_relay_parent.prospective_parachains_mode { - ProspectiveParachainsMode::Enabled => { + let collation_message = match protocol_version { + CollationVersion::VStaging => { let wire_message = protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { relay_parent, candidate_hash: *candidate_hash, @@ -577,7 +612,16 @@ async fn advertise_collation( wire_message, )) }, - ProspectiveParachainsMode::Disabled => { + CollationVersion::V1 => { + if per_relay_parent.prospective_parachains_mode.is_enabled() { + gum::warn!( + target: LOG_TARGET, + ?relay_parent, + peer_id = %peer, + "Skipping advertisement to validator, incorrect network protocol version", + ); + return + } let wire_message = protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)) @@ -594,7 +638,7 @@ async fn advertise_collation( .validator_group .advertised_to_peer(candidate_hash, &peer_ids, peer); - metrics.on_advertisment_made(); + metrics.on_advertisement_made(); } } @@ -879,7 +923,7 @@ async fn handle_incoming_request( } if waiting.collation_fetch_active { - waiting.waiting.push_back(req); + waiting.req_queue.push_back(req); } else { waiting.collation_fetch_active = true; // Obtain a timer for sending collation @@ -914,7 +958,10 @@ async fn handle_peer_view_change( peer_id: PeerId, view: View, ) { - let current = state.peer_views.entry(peer_id.clone()).or_default(); + let PeerData { view: current, version } = match state.peer_data.get_mut(&peer_id) { + Some(peer_data) => peer_data, + None => return, + }; let added: Vec = view.difference(&*current).cloned().collect(); @@ -952,6 +999,7 @@ async fn handle_peer_view_change( *block_hash, per_relay_parent, &peer_id, + *version, &state.peer_ids, &state.metrics, ) @@ -971,10 +1019,30 @@ async fn handle_network_msg( use NetworkBridgeEvent::*; match bridge_message { - PeerConnected(peer_id, observed_role, _, maybe_authority) => { + PeerConnected(peer_id, observed_role, protocol_version, maybe_authority) => { // If it is possible that a disconnected validator would attempt a reconnect // it should be handled here. gum::trace!(target: LOG_TARGET, ?peer_id, ?observed_role, "Peer connected"); + + let version = match protocol_version.try_into() { + Ok(version) => version, + Err(err) => { + // Network bridge is expected to handle this. + gum::error!( + target: LOG_TARGET, + ?peer_id, + ?observed_role, + ?err, + "Unsupported protocol version" + ); + return Ok(()) + }, + }; + state + .peer_data + .entry(peer_id) + .or_insert_with(|| PeerData { view: View::default(), version }); + if let Some(authority_ids) = maybe_authority { gum::trace!( target: LOG_TARGET, @@ -984,7 +1052,7 @@ async fn handle_network_msg( ); state.peer_ids.insert(peer_id, authority_ids); - declare(ctx, state, peer_id).await; + declare(ctx, state, &peer_id, version).await; } }, PeerViewChange(peer_id, view) => { @@ -993,7 +1061,7 @@ async fn handle_network_msg( }, PeerDisconnected(peer_id) => { gum::trace!(target: LOG_TARGET, ?peer_id, "Peer disconnected"); - state.peer_views.remove(&peer_id); + state.peer_data.remove(&peer_id); state.peer_ids.remove(&peer_id); }, OurViewChange(view) => { @@ -1151,13 +1219,12 @@ pub(crate) async fn run( "Sending collation to validator timed out, carrying on with next validator." ); // Drop all requests from slow peer. - waiting.waiting_peers.retain(|(waiting_peer_id, ..)| *waiting_peer_id != peer_id); - waiting.waiting.retain(|req| req.peer_id() != peer_id); + waiting.req_queue.retain(|req| req.peer_id() != peer_id); } else { waiting.waiting_peers.remove(&(peer_id, candidate_hash)); } - if let Some(next) = waiting.waiting.pop_front() { + if let Some(next) = waiting.req_queue.pop_front() { next } else { waiting.collation_fetch_active = false; diff --git a/node/network/collator-protocol/src/collator_side/tests/mod.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs index b79769320079..1ad9baa565ba 100644 --- a/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -461,6 +461,7 @@ async fn distribute_collation( async fn connect_peer( virtual_overseer: &mut VirtualOverseer, peer: PeerId, + version: CollationVersion, authority_id: Option, ) { overseer_send( @@ -468,7 +469,7 @@ async fn connect_peer( CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerConnected( peer.clone(), polkadot_node_network_protocol::ObservedRole::Authority, - CollationVersion::V1.into(), + version.into(), authority_id.map(|v| HashSet::from([v])), )), ) @@ -529,7 +530,7 @@ async fn expect_declare_msg( /// Check that the next received message is a collation advertisement message. /// -/// Expects vstaging message if `expected_candidate_hash` is `Some`, v1 otherwise. +/// Expects vstaging message if `expected_candidate_hashes` is `Some`, v1 otherwise. async fn expect_advertise_collation_msg( virtual_overseer: &mut VirtualOverseer, peer: &PeerId, @@ -632,7 +633,13 @@ fn advertise_and_send_collation() { .into_iter() .zip(test_state.current_group_validator_peer_ids()) { - connect_peer(&mut virtual_overseer, peer.clone(), Some(val.clone())).await; + connect_peer( + &mut virtual_overseer, + peer.clone(), + CollationVersion::V1, + Some(val.clone()), + ) + .await; } // We declare to the connected validators that we are a collator. @@ -768,6 +775,89 @@ fn advertise_and_send_collation() { }); } +/// Tests that collator side works with vstaging network protocol +/// before async backing is enabled. +#[test] +fn advertise_and_send_collation_vstaging_protocol() { + let test_state = TestState::default(); + let local_peer_id = test_state.local_peer_id.clone(); + let collator_pair = test_state.collator_pair.clone(); + + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; + + setup_system(virtual_overseer, &test_state).await; + + let DistributeCollation { candidate, .. } = distribute_collation( + virtual_overseer, + &test_state, + test_state.relay_parent, + &test_state.group_rotation_info, + true, + ) + .await; + + let validators = test_state.current_group_validator_authority_ids(); + assert!(validators.len() >= 2); + let peer_ids = test_state.current_group_validator_peer_ids(); + + // Connect first peer with v1. + connect_peer( + virtual_overseer, + peer_ids[0], + CollationVersion::V1, + Some(validators[0].clone()), + ) + .await; + // The rest with vstaging. + for (val, peer) in validators.iter().zip(peer_ids.iter()).skip(1) { + connect_peer( + virtual_overseer, + peer.clone(), + CollationVersion::VStaging, + Some(val.clone()), + ) + .await; + } + + // Declare messages. + expect_declare_msg(virtual_overseer, &test_state, &peer_ids[0]).await; + for peer_id in peer_ids.iter().skip(1) { + prospective_parachains::expect_declare_msg_vstaging( + virtual_overseer, + &test_state, + &peer_id, + ) + .await; + } + + // Send info about peers view. + for peer in peer_ids.iter() { + send_peer_view_change(virtual_overseer, peer, vec![test_state.relay_parent]).await; + } + + // Versioned advertisements work. + expect_advertise_collation_msg( + virtual_overseer, + &peer_ids[0], + test_state.relay_parent, + None, + ) + .await; + for peer_id in peer_ids.iter().skip(1) { + expect_advertise_collation_msg( + virtual_overseer, + peer_id, + test_state.relay_parent, + Some(vec![candidate.hash()]), // This is `Some`, advertisement is vstaging. + ) + .await; + } + + test_harness + }); +} + #[test] fn send_only_one_collation_per_relay_parent_at_a_time() { test_validator_send_sequence(|mut second_response_receiver, feedback_first_tx| async move { @@ -804,7 +894,13 @@ fn collators_declare_to_connected_peers() { setup_system(&mut test_harness.virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(&mut test_harness.virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer( + &mut test_harness.virtual_overseer, + peer.clone(), + CollationVersion::V1, + Some(validator_id), + ) + .await; expect_declare_msg(&mut test_harness.virtual_overseer, &test_state, &peer).await; test_harness }) @@ -828,10 +924,12 @@ fn collations_are_only_advertised_to_validators_with_correct_view() { setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), CollationVersion::V1, Some(validator_id)) + .await; // Connect the second validator - connect_peer(virtual_overseer, peer2.clone(), Some(validator_id2)).await; + connect_peer(virtual_overseer, peer2.clone(), CollationVersion::V1, Some(validator_id2)) + .await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; expect_declare_msg(virtual_overseer, &test_state, &peer2).await; @@ -879,10 +977,12 @@ fn collate_on_two_different_relay_chain_blocks() { setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), CollationVersion::V1, Some(validator_id)) + .await; // Connect the second validator - connect_peer(virtual_overseer, peer2.clone(), Some(validator_id2)).await; + connect_peer(virtual_overseer, peer2.clone(), CollationVersion::V1, Some(validator_id2)) + .await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; expect_declare_msg(virtual_overseer, &test_state, &peer2).await; @@ -936,7 +1036,13 @@ fn validator_reconnect_does_not_advertise_a_second_time() { setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(virtual_overseer, peer.clone(), Some(validator_id.clone())).await; + connect_peer( + virtual_overseer, + peer.clone(), + CollationVersion::V1, + Some(validator_id.clone()), + ) + .await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; distribute_collation( @@ -954,7 +1060,8 @@ fn validator_reconnect_does_not_advertise_a_second_time() { // Disconnect and reconnect directly disconnect_peer(virtual_overseer, peer.clone()).await; - connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), CollationVersion::V1, Some(validator_id)) + .await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; send_peer_view_change(virtual_overseer, &peer, vec![test_state.relay_parent]).await; @@ -980,7 +1087,8 @@ fn collators_reject_declare_messages() { setup_system(virtual_overseer, &test_state).await; // A validator connected to us - connect_peer(virtual_overseer, peer.clone(), Some(validator_id)).await; + connect_peer(virtual_overseer, peer.clone(), CollationVersion::V1, Some(validator_id)) + .await; expect_declare_msg(virtual_overseer, &test_state, &peer).await; overseer_send( @@ -1045,7 +1153,8 @@ where .into_iter() .zip(test_state.current_group_validator_peer_ids()) { - connect_peer(virtual_overseer, peer.clone(), Some(val.clone())).await; + connect_peer(virtual_overseer, peer.clone(), CollationVersion::V1, Some(val.clone())) + .await; } // We declare to the connected validators that we are a collator. diff --git a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs index 3e00ddabb298..f22e70598301 100644 --- a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -129,6 +129,40 @@ async fn update_view( } } +/// Check that the next received message is a `Declare` message. +pub(super) async fn expect_declare_msg_vstaging( + virtual_overseer: &mut VirtualOverseer, + test_state: &TestState, + peer: &PeerId, +) { + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendCollationMessage( + to, + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + wire_message, + )), + )) => { + assert_eq!(to[0], *peer); + assert_matches!( + wire_message, + protocol_vstaging::CollatorProtocolMessage::Declare( + collator_id, + para_id, + signature, + ) => { + assert!(signature.verify( + &*protocol_vstaging::declare_signature_payload(&test_state.local_peer_id), + &collator_id), + ); + assert_eq!(collator_id, test_state.collator_pair.public()); + assert_eq!(para_id, test_state.para_id); + } + ); + } + ); +} + /// Test that a collator distributes a collation from the allowed ancestry /// to correct validators group. #[test] @@ -171,12 +205,18 @@ fn distribute_collation_from_implicit_view() { .into_iter() .zip(validator_peer_ids.clone()) { - connect_peer(virtual_overseer, peer.clone(), Some(val.clone())).await; + connect_peer( + virtual_overseer, + peer.clone(), + CollationVersion::VStaging, + Some(val.clone()), + ) + .await; } // Collator declared itself to each peer. for peer_id in &validator_peer_ids { - expect_declare_msg(virtual_overseer, &test_state, peer_id).await; + expect_declare_msg_vstaging(virtual_overseer, &test_state, peer_id).await; } let pov = PoV { block_data: BlockData(vec![1, 2, 3]) }; @@ -391,8 +431,14 @@ fn advertise_and_send_collation_by_hash() { let peer = test_state.validator_peer_id[0].clone(); let validator_id = test_state.current_group_validator_authority_ids()[0].clone(); - connect_peer(&mut virtual_overseer, peer.clone(), Some(validator_id.clone())).await; - expect_declare_msg(&mut virtual_overseer, &test_state, &peer).await; + connect_peer( + &mut virtual_overseer, + peer.clone(), + CollationVersion::VStaging, + Some(validator_id.clone()), + ) + .await; + expect_declare_msg_vstaging(&mut virtual_overseer, &test_state, &peer).await; // Head `b` is not a leaf, but both advertisements are still relevant. send_peer_view_change(&mut virtual_overseer, &peer, vec![head_b]).await; From 16643c6016c49e1f396c127abfa13c412539485a Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 13 Sep 2022 23:41:18 +0300 Subject: [PATCH 23/45] cleanup --- node/network/collator-protocol/src/error.rs | 3 - .../src/validator_side/mod.rs | 240 ++++++++---------- 2 files changed, 105 insertions(+), 138 deletions(-) diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index 3beb5e057d91..be3ccbc2c27a 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -52,9 +52,6 @@ pub enum Error { #[error(transparent)] ImplicitViewFetchError(backing_implicit_view::FetchError), - #[error("Response receiver for hypothetical depth request cancelled")] - CancelledGetHypotheticalDepth(oneshot::Canceled), - #[error("Response receiver for Runtime API version request cancelled")] CancelledRuntimeApiVersion(oneshot::Canceled), diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 08f282f9ec28..57118ec67b57 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -151,7 +151,7 @@ enum PeerState { } #[derive(Debug)] -enum AdvertisementError { +enum InsertAdvertisementError { /// Advertisement is already known. Duplicate, /// Collation relay parent is out of our view. @@ -159,7 +159,7 @@ enum AdvertisementError { /// No prior declare message received. UndeclaredCollator, /// A limit for announcements per peer is reached. - LimitReached, + PeerLimitReached, /// Mismatch of relay parent mode and advertisement arguments. /// An internal error that should not happen. InvalidArguments, @@ -246,9 +246,9 @@ impl PeerData { candidate_hash: Option, implicit_view: &ImplicitView, active_leaves: &HashMap, - ) -> std::result::Result<(CollatorId, ParaId), AdvertisementError> { + ) -> std::result::Result<(CollatorId, ParaId), InsertAdvertisementError> { match self.state { - PeerState::Connected(_) => Err(AdvertisementError::UndeclaredCollator), + PeerState::Connected(_) => Err(InsertAdvertisementError::UndeclaredCollator), PeerState::Collating(ref mut state) => { if !is_relay_parent_in_implicit_view( &on_relay_parent, @@ -257,13 +257,13 @@ impl PeerData { active_leaves, state.para_id, ) { - return Err(AdvertisementError::OutOfOurView) + return Err(InsertAdvertisementError::OutOfOurView) } match (relay_parent_mode, candidate_hash) { (ProspectiveParachainsMode::Disabled, None) => { if state.advertisements.contains_key(&on_relay_parent) { - return Err(AdvertisementError::Duplicate) + return Err(InsertAdvertisementError::Duplicate) } state.advertisements.insert(on_relay_parent, HashSet::new()); }, @@ -273,16 +273,16 @@ impl PeerData { .get(&on_relay_parent) .map_or(false, |candidates| candidates.contains(&candidate_hash)) { - return Err(AdvertisementError::Duplicate) + return Err(InsertAdvertisementError::Duplicate) } let candidates = state.advertisements.entry(on_relay_parent).or_default(); if candidates.len() >= MAX_CANDIDATE_DEPTH + 1 { - return Err(AdvertisementError::LimitReached) + return Err(InsertAdvertisementError::PeerLimitReached) } candidates.insert(candidate_hash); }, - _ => return Err(AdvertisementError::InvalidArguments), + _ => return Err(InsertAdvertisementError::InvalidArguments), } state.last_active = Instant::now(); @@ -893,20 +893,48 @@ async fn process_incoming_peer_message( } }, Versioned::V1(V1::AdvertiseCollation(relay_parent)) => - handle_advertisement(ctx.sender(), state, relay_parent, &origin, None).await, + if let Err(err) = + handle_advertisement(ctx.sender(), state, relay_parent, &origin, None).await + { + gum::debug!( + target: LOG_TARGET, + peer_id = ?origin, + ?relay_parent, + error = ?err, + "Rejected v1 advertisement", + ); + + if let Some(rep) = err.reputation_changes() { + modify_reputation(ctx.sender(), origin.clone(), rep).await; + } + }, Versioned::VStaging(VStaging::AdvertiseCollation { relay_parent, candidate_hash, parent_head_data_hash, }) => - handle_advertisement( + if let Err(err) = handle_advertisement( ctx.sender(), state, relay_parent, &origin, Some((candidate_hash, parent_head_data_hash)), ) - .await, + .await + { + gum::debug!( + target: LOG_TARGET, + peer_id = ?origin, + ?relay_parent, + ?candidate_hash, + error = ?err, + "Rejected vstaging advertisement", + ); + + if let Some(rep) = err.reputation_changes() { + modify_reputation(ctx.sender(), origin.clone(), rep).await; + } + }, Versioned::V1(V1::CollationSeconded(..)) | Versioned::VStaging(VStaging::CollationSeconded(..)) => { gum::warn!( @@ -927,7 +955,7 @@ async fn is_seconding_allowed( parent_head_data_hash: Hash, para_id: ParaId, active_leaves: impl IntoIterator, -) -> Result +) -> Option where Sender: CollatorProtocolSenderTrait, { @@ -946,14 +974,51 @@ where .send_message(ProspectiveParachainsMessage::GetHypotheticalDepth(request, tx)) .await; - let response = rx.await.map_err(Error::CancelledGetHypotheticalDepth)?; + let response = rx.await.ok()?; if !response.is_empty() { - return Ok(true) + return Some(true) } } - Ok(false) + Some(false) +} + +#[derive(Debug)] +enum AdvertisementError { + /// Relay parent is unknown. + RelayParentOutOfView, + /// Peer is not present in the subsystem state. + UnknownPeer, + /// Peer has not declared its para id. + UndeclaredCollator, + /// We're assigned to a different para at the given relay parent. + InvalidAssignment, + /// Collator is trying to build on top of occupied core. + CoreOccupied, + /// An advertisement format doesn't match the relay parent. + ProtocolMismatch, + /// Para reached a limit of seconded candidates for this relay parent. + SecondedLimitReached, + /// Failed to insert an advertisement. + FailedToInsert(InsertAdvertisementError), + /// Failed to query prospective parachains subsystem. + ProspectiveParachainsUnavailable, +} + +impl AdvertisementError { + fn reputation_changes(&self) -> Option { + use AdvertisementError::*; + match self { + InvalidAssignment => Some(COST_WRONG_PARA), + RelayParentOutOfView | UndeclaredCollator | CoreOccupied | FailedToInsert(_) => + Some(COST_UNEXPECTED_MESSAGE), + UnknownPeer | + ProtocolMismatch | + SecondedLimitReached | + ProspectiveParachainsUnavailable => None, + } + } } async fn handle_advertisement( @@ -962,7 +1027,8 @@ async fn handle_advertisement( relay_parent: Hash, peer_id: &PeerId, prospective_candidate: Option<(CandidateHash, Hash)>, -) where +) -> std::result::Result<(), AdvertisementError> +where Sender: CollatorProtocolSenderTrait, { let _span = state @@ -970,81 +1036,24 @@ async fn handle_advertisement( .get(&relay_parent) .map(|s| s.child("advertise-collation")); - // First, perform validity checks: - // - Relay parent is known - // - Peer is declared - // - Para id is indeed the one we're assigned to at the given relay parent - // - Collator is not trying to build on top of occupied core (unless async - // backing is enabled) + let per_relay_parent = state + .per_relay_parent + .get_mut(&relay_parent) + .ok_or(AdvertisementError::RelayParentOutOfView)?; - let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { - Some(state) => state, - None => { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - "Advertise collation out of view", - ); - - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; - return - }, - }; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; let assignment = per_relay_parent.assignment; - let peer_data = match state.peer_data.get_mut(&peer_id) { - None => { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - "Advertise collation message has been received from an unknown peer", - ); - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; - return - }, - Some(p) => p, - }; - - let para_id = if let Some(id) = peer_data.collating_para() { - id - } else { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - "Advertise collation message received from undeclared peer", - ); - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; - return - }; + let peer_data = state.peer_data.get_mut(&peer_id).ok_or(AdvertisementError::UnknownPeer)?; + let para_id = peer_data.collating_para().ok_or(AdvertisementError::UndeclaredCollator)?; let core_state = match assignment.current { Some((id, core_state)) if id == para_id => core_state, - _ => { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - para_id = ?para_id, - ?relay_parent, - "Advertise collation message for para we're no assigned to", - ); - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; - return - }, + _ => return Err(AdvertisementError::InvalidAssignment), }; if !relay_parent_mode.is_enabled() && core_state.is_occupied() { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - "Advertise collation message for an occupied core (async backing disabled)", - ); - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; - return + return Err(AdvertisementError::CoreOccupied) } // TODO: only fetch a collation if it's built on top of backed nodes in fragment tree. @@ -1062,33 +1071,14 @@ async fn handle_advertisement( active_leaves, ) .await - .unwrap_or_else(|err| { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - ?para_id, - ?candidate_hash, - ?relay_parent_mode, - error = %err, - "Failed to query prospective parachains subsystem", - ); - false - }) - }, - _ => { - gum::error!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - relay_parent_mode = ?relay_parent_mode, - "Invalid arguments for advertisement", - ); - return + .ok_or(AdvertisementError::ProspectiveParachainsUnavailable)? }, + _ => return Err(AdvertisementError::ProtocolMismatch), }; if !is_seconding_allowed { - return + // TODO + return Ok(()) } let candidate_hash = prospective_candidate.map(|(hash, ..)| hash); @@ -1114,21 +1104,14 @@ async fn handle_advertisement( ProspectiveCandidate { candidate_hash, parent_head_data_hash } }); - let pending_collation = - PendingCollation::new(relay_parent, para_id, peer_id, prospective_candidate); - let collations = &mut per_relay_parent.collations; if !collations.is_seconded_limit_reached(relay_parent_mode) { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - para_id = ?para_id, - ?relay_parent, - "Seconded collations limit reached", - ); - return + return Err(AdvertisementError::SecondedLimitReached) } + let pending_collation = + PendingCollation::new(relay_parent, para_id, peer_id, prospective_candidate); + match collations.status { CollationStatus::Fetching | CollationStatus::WaitingOnValidation => { gum::trace!( @@ -1159,27 +1142,14 @@ async fn handle_advertisement( }, } }, - Err(AdvertisementError::InvalidArguments) => { - gum::warn!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - relay_parent_mode = ?relay_parent_mode, - "Relay parent mode mismatch", - ); - }, - Err(error) => { - gum::debug!( - target: LOG_TARGET, - peer_id = ?peer_id, - ?relay_parent, - ?error, - "Invalid advertisement", - ); - - modify_reputation(sender, *peer_id, COST_UNEXPECTED_MESSAGE).await; + Err(InsertAdvertisementError::InvalidArguments) => { + // Checked above. + return Err(AdvertisementError::ProtocolMismatch) }, + Err(error) => return Err(AdvertisementError::FailedToInsert(error)), } + + Ok(()) } /// Our view has changed. From 32b667499acd44184cce9f4d98a4ffe609d3fdeb Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Tue, 13 Sep 2022 23:53:48 +0300 Subject: [PATCH 24/45] merge artifacts --- node/network/protocol/src/lib.rs | 3 +-- node/network/protocol/src/peer_set.rs | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs index a0da0790e562..50a15f84622e 100644 --- a/node/network/protocol/src/lib.rs +++ b/node/network/protocol/src/lib.rs @@ -19,8 +19,7 @@ #![deny(unused_crate_dependencies)] #![warn(missing_docs)] -#[doc(hidden)] -pub use parity_scale_codec::{Decode, Encode}; +use parity_scale_codec::{Decode, Encode}; use polkadot_primitives::v2::{BlockNumber, Hash}; use std::{collections::HashMap, fmt}; diff --git a/node/network/protocol/src/peer_set.rs b/node/network/protocol/src/peer_set.rs index 6e570f6f0170..58fb058a803d 100644 --- a/node/network/protocol/src/peer_set.rs +++ b/node/network/protocol/src/peer_set.rs @@ -122,17 +122,10 @@ impl PeerSet { } #[cfg(feature = "network-protocol-staging")] - #[cfg(not(feature = "network-protocol-staging"))] match self { PeerSet::Validation => ValidationVersion::VStaging.into(), PeerSet::Collation => CollationVersion::VStaging.into(), } - - #[cfg(feature = "network-protocol-staging")] - match self { - PeerSet::Validation => VALIDATION_PROTOCOL_VSTAGING, - PeerSet::Collation => COLLATION_PROTOCOL_VSTAGING, - } } /// Get the max notification size for this peer set. From 23fc0a0a80807dbe516c3662c6205d95fc8e04d4 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 14 Sep 2022 23:08:00 +0300 Subject: [PATCH 25/45] Validator side net protocol versioning --- .../src/validator_side/mod.rs | 87 ++++++++++++------- 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 57118ec67b57..6a3910819e20 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -25,6 +25,7 @@ use futures::{ use futures_timer::Delay; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, + convert::TryInto, task::Poll, time::{Duration, Instant}, }; @@ -33,7 +34,7 @@ use sp_keystore::SyncCryptoStorePtr; use polkadot_node_network_protocol::{ self as net_protocol, - peer_set::PeerSet, + peer_set::{CollationVersion, PeerSet}, request_response as req_res, request_response::{ outgoing::{Recipient, RequestError}, @@ -162,20 +163,17 @@ enum InsertAdvertisementError { PeerLimitReached, /// Mismatch of relay parent mode and advertisement arguments. /// An internal error that should not happen. - InvalidArguments, + ProtocolMismatch, } #[derive(Debug)] struct PeerData { view: View, state: PeerState, + version: CollationVersion, } impl PeerData { - fn new(view: View) -> Self { - PeerData { view, state: PeerState::Connected(Instant::now()) } - } - /// Update the view, clearing all advertisements that are no longer in the /// current view. fn update_view( @@ -282,7 +280,7 @@ impl PeerData { } candidates.insert(candidate_hash); }, - _ => return Err(InsertAdvertisementError::InvalidArguments), + _ => return Err(InsertAdvertisementError::ProtocolMismatch), } state.last_active = Instant::now(); @@ -357,12 +355,6 @@ impl PeerData { } } -impl Default for PeerData { - fn default() -> Self { - PeerData::new(Default::default()) - } -} - #[derive(Debug, Copy, Clone)] enum AssignedCoreState { Scheduled, @@ -665,16 +657,25 @@ async fn note_good_collation( async fn notify_collation_seconded( sender: &mut impl overseer::CollatorProtocolSenderTrait, peer_id: PeerId, + version: CollationVersion, relay_parent: Hash, statement: SignedFullStatement, ) { - let wire_message = - protocol_v1::CollatorProtocolMessage::CollationSeconded(relay_parent, statement.into()); + let statement = statement.into(); + let wire_message = match version { + CollationVersion::V1 => Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol( + protocol_v1::CollatorProtocolMessage::CollationSeconded(relay_parent, statement), + )), + CollationVersion::VStaging => + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + protocol_vstaging::CollatorProtocolMessage::CollationSeconded( + relay_parent, + statement, + ), + )), + }; sender - .send_message(NetworkBridgeTxMessage::SendCollationMessage( - vec![peer_id], - Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)), - )) + .send_message(NetworkBridgeTxMessage::SendCollationMessage(vec![peer_id], wire_message)) .await; modify_reputation(sender, peer_id, BENEFIT_NOTIFY_GOOD).await; @@ -683,8 +684,11 @@ async fn notify_collation_seconded( /// A peer's view has changed. A number of things should be done: /// - Ongoing collation requests have to be canceled. /// - Advertisements by this peer that are no longer relevant have to be removed. -async fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) -> Result<()> { - let peer_data = state.peer_data.entry(peer_id.clone()).or_default(); +fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) { + let peer_data = match state.peer_data.get_mut(&peer_id) { + Some(peer_data) => peer_data, + None => return, + }; peer_data.update_view( &state.implicit_view, @@ -695,8 +699,6 @@ async fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) state .requested_collations .retain(|pc, _| pc.peer_id != peer_id || peer_data.has_advertised(&pc.relay_parent, None)); - - Ok(()) } /// Request a collation from the network. @@ -1059,7 +1061,7 @@ where // TODO: only fetch a collation if it's built on top of backed nodes in fragment tree. // https://github.com/paritytech/polkadot/issues/5923 let is_seconding_allowed = match (relay_parent_mode, prospective_candidate) { - (ProspectiveParachainsMode::Disabled, None) => true, + (ProspectiveParachainsMode::Disabled, _) => true, (ProspectiveParachainsMode::Enabled, Some((candidate_hash, parent_head_data_hash))) => { let active_leaves = state.active_leaves.keys().copied(); is_seconding_allowed( @@ -1142,7 +1144,7 @@ where }, } }, - Err(InsertAdvertisementError::InvalidArguments) => { + Err(InsertAdvertisementError::ProtocolMismatch) => { // Checked above. return Err(AdvertisementError::ProtocolMismatch) }, @@ -1278,8 +1280,26 @@ async fn handle_network_msg( use NetworkBridgeEvent::*; match bridge_message { - PeerConnected(peer_id, _role, _version, _) => { - state.peer_data.entry(peer_id).or_default(); + PeerConnected(peer_id, observed_role, protocol_version, _) => { + let version = match protocol_version.try_into() { + Ok(version) => version, + Err(err) => { + // Network bridge is expected to handle this. + gum::error!( + target: LOG_TARGET, + ?peer_id, + ?observed_role, + ?err, + "Unsupported protocol version" + ); + return Ok(()) + }, + }; + state.peer_data.entry(peer_id).or_insert_with(|| PeerData { + view: View::default(), + state: PeerState::Connected(Instant::now()), + version, + }); state.metrics.note_collator_peer_count(state.peer_data.len()); }, PeerDisconnected(peer_id) => { @@ -1290,7 +1310,7 @@ async fn handle_network_msg( // impossible! }, PeerViewChange(peer_id, view) => { - handle_peer_view_change(state, peer_id, view).await?; + handle_peer_view_change(state, peer_id, view); }, OurViewChange(view) => { handle_our_view_change(ctx.sender(), state, keystore, view).await?; @@ -1354,7 +1374,16 @@ async fn process_msg( let (collator_id, pending_collation) = collation_event; let PendingCollation { relay_parent, peer_id, .. } = pending_collation; note_good_collation(ctx.sender(), &state.peer_data, collator_id.clone()).await; - notify_collation_seconded(ctx.sender(), peer_id, relay_parent, stmt).await; + if let Some(peer_data) = state.peer_data.get(&peer_id) { + notify_collation_seconded( + ctx.sender(), + peer_id, + peer_data.version, + relay_parent, + stmt, + ) + .await; + } if let Some(state) = state.per_relay_parent.get_mut(&parent) { state.collations.status = CollationStatus::Seconded; From c014585b9c39ee5e0a652c873c8bec3073ef7aa6 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 14 Sep 2022 23:16:54 +0300 Subject: [PATCH 26/45] Remove fragment tree membership request --- .../src/validator_side/mod.rs | 39 +++++-------------- 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 6a3910819e20..e109dc55e4a3 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -47,7 +47,7 @@ use polkadot_node_primitives::{PoV, SignedFullStatement, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ - CandidateBackingMessage, CollatorProtocolMessage, HypotheticalDepthRequest, IfDisconnected, + CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, NetworkBridgeEvent, NetworkBridgeTxMessage, ProspectiveParachainsMessage, ProspectiveValidationDataRequest, }, @@ -951,39 +951,18 @@ async fn process_incoming_peer_message( } async fn is_seconding_allowed( - sender: &mut Sender, - relay_parent: Hash, - candidate_hash: CandidateHash, - parent_head_data_hash: Hash, - para_id: ParaId, - active_leaves: impl IntoIterator, + _sender: &mut Sender, + _relay_parent: Hash, + _candidate_hash: CandidateHash, + _parent_head_data_hash: Hash, + _para_id: ParaId, + _active_leaves: impl IntoIterator, ) -> Option where Sender: CollatorProtocolSenderTrait, { - for leaf in active_leaves { - let (tx, rx) = oneshot::channel(); - - let request = HypotheticalDepthRequest { - candidate_hash, - candidate_para: para_id, - parent_head_data_hash, - candidate_relay_parent: relay_parent, - fragment_tree_relay_parent: leaf, - }; - - sender - .send_message(ProspectiveParachainsMessage::GetHypotheticalDepth(request, tx)) - .await; - - let response = rx.await.ok()?; - - if !response.is_empty() { - return Some(true) - } - } - - Some(false) + // TODO https://github.com/paritytech/polkadot/issues/5923 + Some(true) } #[derive(Debug)] From 81b957e69f81291575ee352f442558d43ce1a70e Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 15 Sep 2022 13:29:39 +0300 Subject: [PATCH 27/45] Resolve todo --- .../collator-protocol/src/validator_side/mod.rs | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index e109dc55e4a3..70994551a595 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -47,9 +47,8 @@ use polkadot_node_primitives::{PoV, SignedFullStatement, Statement}; use polkadot_node_subsystem::{ jaeger, messages::{ - CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, - NetworkBridgeEvent, NetworkBridgeTxMessage, ProspectiveParachainsMessage, - ProspectiveValidationDataRequest, + CandidateBackingMessage, CollatorProtocolMessage, IfDisconnected, NetworkBridgeEvent, + NetworkBridgeTxMessage, ProspectiveParachainsMessage, ProspectiveValidationDataRequest, }, overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan, }; @@ -1576,15 +1575,8 @@ where { let (tx, rx) = oneshot::channel(); - let request = ProspectiveValidationDataRequest { - para_id, - candidate_relay_parent, - parent_head_data_hash, - // TODO [now]: max pov size should be from runtime - // configuration at candidate relay parent. - // Where do we fetch it from? - max_pov_size: todo!(), - }; + let request = + ProspectiveValidationDataRequest { para_id, candidate_relay_parent, parent_head_data_hash }; sender .send_message(ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx)) From 91961c7aefae493cd8be97ab425abb09f1e3df26 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 15 Sep 2022 15:49:07 +0300 Subject: [PATCH 28/45] Collator side core state test --- .../src/collator_side/tests/mod.rs | 2 +- .../tests/prospective_parachains.rs | 85 ++++++++++++++++++- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/tests/mod.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs index 1ad9baa565ba..6b252fa2efae 100644 --- a/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -778,7 +778,7 @@ fn advertise_and_send_collation() { /// Tests that collator side works with vstaging network protocol /// before async backing is enabled. #[test] -fn advertise_and_send_collation_vstaging_protocol() { +fn advertise_collation_vstaging_protocol() { let test_state = TestState::default(); let local_peer_id = test_state.local_peer_id.clone(); let collator_pair = test_state.collator_pair.clone(); diff --git a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs index f22e70598301..67cdef1fed34 100644 --- a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -19,7 +19,7 @@ use super::*; use polkadot_node_subsystem::messages::{ChainApiMessage, ProspectiveParachainsMessage}; -use polkadot_primitives::v2::Header; +use polkadot_primitives::v2::{Header, OccupiedCore}; const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; @@ -483,3 +483,86 @@ fn advertise_and_send_collation_by_hash() { TestHarness { virtual_overseer, req_v1_cfg, req_vstaging_cfg } }) } + +/// Tests that collator distributes collation built on top of occupied core. +#[test] +fn advertise_core_occupied() { + let mut test_state = TestState::default(); + let candidate = + TestCandidateBuilder { para_id: test_state.para_id, ..Default::default() }.build(); + test_state.availability_core = CoreState::Occupied(OccupiedCore { + next_up_on_available: None, + occupied_since: 0, + time_out_at: 0, + next_up_on_time_out: None, + availability: BitVec::default(), + group_responsible: GroupIndex(0), + candidate_hash: candidate.hash(), + candidate_descriptor: candidate.descriptor, + }); + + let local_peer_id = test_state.local_peer_id; + let collator_pair = test_state.collator_pair.clone(); + + test_harness(local_peer_id, collator_pair, |mut test_harness| async move { + let virtual_overseer = &mut test_harness.virtual_overseer; + + let head_a = Hash::from_low_u64_be(128); + let head_a_num: u32 = 64; + + // Grandparent of head `a`. + let head_b = Hash::from_low_u64_be(130); + + // Set collating para id. + overseer_send(virtual_overseer, CollatorProtocolMessage::CollateOn(test_state.para_id)) + .await; + // Activated leaf is `a`, but the collation will be based on `b`. + update_view(virtual_overseer, &test_state, vec![(head_a, head_a_num)], 1).await; + + let pov = PoV { block_data: BlockData(vec![1, 2, 3]) }; + let candidate = TestCandidateBuilder { + para_id: test_state.para_id, + relay_parent: head_b, + pov_hash: pov.hash(), + ..Default::default() + } + .build(); + let candidate_hash = candidate.hash(); + distribute_collation_with_receipt( + virtual_overseer, + &test_state, + head_b, + &test_state.group_rotation_info, + true, + candidate, + pov, + Hash::zero(), + ) + .await; + + let validators = test_state.current_group_validator_authority_ids(); + let peer_ids = test_state.current_group_validator_peer_ids(); + + connect_peer( + virtual_overseer, + peer_ids[0], + CollationVersion::VStaging, + Some(validators[0].clone()), + ) + .await; + expect_declare_msg_vstaging(virtual_overseer, &test_state, &peer_ids[0]).await; + // Peer is aware of the leaf. + send_peer_view_change(virtual_overseer, &peer_ids[0], vec![head_a]).await; + + // Collation is advertised. + expect_advertise_collation_msg( + virtual_overseer, + &peer_ids[0], + head_b, + Some(vec![candidate_hash]), + ) + .await; + + test_harness + }) +} From e647370e40ee5a3d8cb7fdd0070c167942cd1eea Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 15 Sep 2022 22:32:24 +0300 Subject: [PATCH 29/45] Improve net protocol compatibility --- .../src/collator_side/mod.rs | 23 +- .../src/validator_side/collation.rs | 2 +- .../src/validator_side/mod.rs | 95 +++--- .../validator_side/{tests.rs => tests/mod.rs} | 275 ++++++++++++++---- .../tests/prospective_parachains.rs | 164 +++++++++++ 5 files changed, 436 insertions(+), 123 deletions(-) rename node/network/collator-protocol/src/validator_side/{tests.rs => tests/mod.rs} (84%) create mode 100644 node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index b2be7b3717ee..833000837efe 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -55,7 +55,10 @@ use polkadot_primitives::v2::{ use super::{ prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH, }; -use crate::error::{log_error, Error, FatalError, Result}; +use crate::{ + error::{log_error, Error, FatalError, Result}, + modify_reputation, +}; mod collation; mod metrics; @@ -792,11 +795,7 @@ async fn handle_incoming_peer_message( "AdvertiseCollation message is not expected on the collator side of the protocol", ); - ctx.send_message(NetworkBridgeTxMessage::ReportPeer( - origin.clone(), - COST_UNEXPECTED_MESSAGE, - )) - .await; + modify_reputation(ctx.sender(), origin, COST_UNEXPECTED_MESSAGE).await; // If we are advertised to, this is another collator, and we should disconnect. ctx.send_message(NetworkBridgeTxMessage::DisconnectPeer(origin, PeerSet::Collation)) @@ -874,17 +873,18 @@ async fn handle_incoming_request( return Ok(()) }, }; + let mode = per_relay_parent.prospective_parachains_mode; - let collation = match (per_relay_parent.prospective_parachains_mode, &req) { - (ProspectiveParachainsMode::Disabled, VersionedCollationRequest::V1(_)) => + let collation = match &req { + VersionedCollationRequest::V1(_) if !mode.is_enabled() => per_relay_parent.collations.values_mut().next(), - (ProspectiveParachainsMode::Enabled, VersionedCollationRequest::VStaging(req)) => + VersionedCollationRequest::VStaging(req) => per_relay_parent.collations.get_mut(&req.payload.candidate_hash), _ => { gum::warn!( target: LOG_TARGET, relay_parent = %relay_parent, - mode = ?per_relay_parent.prospective_parachains_mode, + prospective_parachains_mode = ?mode, ?peer_id, "Collation request version is invalid", ); @@ -917,8 +917,7 @@ async fn handle_incoming_request( target: LOG_TARGET, "Dropping incoming request as peer has a request in flight already." ); - ctx.send_message(NetworkBridgeTxMessage::ReportPeer(peer_id, COST_APPARENT_FLOOD)) - .await; + modify_reputation(ctx.sender(), peer_id, COST_APPARENT_FLOOD).await; return Ok(()) } diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 5c9f74abd86c..7ca64bca9adb 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -57,7 +57,7 @@ impl From<&CandidateReceipt> for FetchedCollation { } } -#[derive(Debug, Clone, Hash, Eq, PartialEq)] +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] pub struct PendingCollation { pub relay_parent: Hash, pub para_id: ParaId, diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 70994551a595..9a218297ed20 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -26,6 +26,7 @@ use futures_timer::Delay; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, convert::TryInto, + iter::FromIterator, task::Poll, time::{Duration, Instant}, }; @@ -258,11 +259,13 @@ impl PeerData { } match (relay_parent_mode, candidate_hash) { - (ProspectiveParachainsMode::Disabled, None) => { + (ProspectiveParachainsMode::Disabled, candidate_hash) => { if state.advertisements.contains_key(&on_relay_parent) { return Err(InsertAdvertisementError::Duplicate) } - state.advertisements.insert(on_relay_parent, HashSet::new()); + state + .advertisements + .insert(on_relay_parent, HashSet::from_iter(candidate_hash)); }, (ProspectiveParachainsMode::Enabled, Some(candidate_hash)) => { if state @@ -589,27 +592,15 @@ async fn fetch_collation( let candidate_hash = prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); if let Some(peer_data) = state.peer_data.get(&peer_id) { - // If candidate hash is `Some` then relay parent supports prospective - // parachains. if peer_data.has_advertised(&relay_parent, candidate_hash) { - let timeout = |collator_id, relay_parent| async move { - Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; - (collator_id, relay_parent) - }; - state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); - request_collation( - sender, - state, - relay_parent, - para_id, - prospective_candidate, - peer_id, - id.clone(), - tx, - ) - .await; - - state.collation_fetches.push(rx.map(|r| ((id, pc), r)).boxed()); + if request_collation(sender, state, pc, id.clone(), peer_data.version, tx).await { + let timeout = |collator_id, relay_parent| async move { + Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; + (collator_id, relay_parent) + }; + state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); + state.collation_fetches.push(rx.map(move |r| ((id, pc), r)).boxed()); + } } else { gum::debug!( target: LOG_TARGET, @@ -709,13 +700,24 @@ fn handle_peer_view_change(state: &mut State, peer_id: PeerId, view: View) { async fn request_collation( sender: &mut impl overseer::CollatorProtocolSenderTrait, state: &mut State, - relay_parent: Hash, - para_id: ParaId, - prospective_candidate: Option, - peer_id: PeerId, + pending_collation: PendingCollation, collator_id: CollatorId, + peer_protocol_version: CollationVersion, result: oneshot::Sender<(CandidateReceipt, PoV)>, -) { +) -> bool { + if state.requested_collations.contains_key(&pending_collation) { + gum::warn!( + target: LOG_TARGET, + peer_id = %pending_collation.peer_id, + %pending_collation.para_id, + ?pending_collation.relay_parent, + "collation has already been requested", + ); + return false + } + + let PendingCollation { relay_parent, para_id, peer_id, prospective_candidate, .. } = + pending_collation; let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { Some(state) => state, None => { @@ -726,25 +728,13 @@ async fn request_collation( relay_parent = %relay_parent, "Collation relay parent is out of view", ); - return + return false }, }; - let relay_parent_mode = per_relay_parent.prospective_parachains_mode; - let pending_collation = - PendingCollation::new(relay_parent, para_id, &peer_id, prospective_candidate); - if state.requested_collations.contains_key(&pending_collation) { - gum::warn!( - target: LOG_TARGET, - peer_id = %pending_collation.peer_id, - %pending_collation.para_id, - ?pending_collation.relay_parent, - "collation has already been requested", - ); - return - } - let (requests, response_recv) = match (relay_parent_mode, prospective_candidate) { - (ProspectiveParachainsMode::Disabled, None) => { + // Relay parent mode is checked in `handle_advertisement`. + let (requests, response_recv) = match (peer_protocol_version, prospective_candidate) { + (CollationVersion::V1, _) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), request_v1::CollationFetchingRequest { relay_parent, para_id }, @@ -752,7 +742,7 @@ async fn request_collation( let requests = Requests::CollationFetchingV1(req); (requests, response_recv.boxed()) }, - (ProspectiveParachainsMode::Enabled, Some(ProspectiveCandidate { candidate_hash, .. })) => { + (CollationVersion::VStaging, Some(ProspectiveCandidate { candidate_hash, .. })) => { let (req, response_recv) = OutgoingRequest::new( Recipient::Peer(peer_id), request_vstaging::CollationFetchingRequest { @@ -765,14 +755,15 @@ async fn request_collation( (requests, response_recv.boxed()) }, _ => { - gum::error!( + gum::warn!( target: LOG_TARGET, peer_id = %peer_id, %para_id, ?relay_parent, - "Invalid arguments for collation request", + ?peer_protocol_version, + "Peer's protocol doesn't match the advertisement", ); - return + return false }, }; @@ -786,10 +777,7 @@ async fn request_collation( _lifetime_timer: state.metrics.time_collation_request_duration(), }; - state.requested_collations.insert( - PendingCollation::new(relay_parent, para_id, &peer_id, prospective_candidate), - per_request, - ); + state.requested_collations.insert(pending_collation, per_request); gum::debug!( target: LOG_TARGET, @@ -808,6 +796,7 @@ async fn request_collation( IfDisconnected::ImmediateError, )) .await; + true } /// Networking message has been received. @@ -1104,12 +1093,12 @@ where collations.waiting_queue.push_back((pending_collation, id)); }, CollationStatus::Waiting => { - fetch_collation(sender, state, pending_collation.clone(), id).await; + fetch_collation(sender, state, pending_collation, id).await; }, CollationStatus::Seconded if relay_parent_mode.is_enabled() => { // Limit is not reached, it's allowed to second another // collation. - fetch_collation(sender, state, pending_collation.clone(), id).await; + fetch_collation(sender, state, pending_collation, id).await; }, CollationStatus::Seconded => { gum::trace!( diff --git a/node/network/collator-protocol/src/validator_side/tests.rs b/node/network/collator-protocol/src/validator_side/tests/mod.rs similarity index 84% rename from node/network/collator-protocol/src/validator_side/tests.rs rename to node/network/collator-protocol/src/validator_side/tests/mod.rs index 986f53640078..6817cbda2f0a 100644 --- a/node/network/collator-protocol/src/validator_side/tests.rs +++ b/node/network/collator-protocol/src/validator_side/tests/mod.rs @@ -40,6 +40,8 @@ use polkadot_primitives_test_helpers::{ dummy_candidate_descriptor, dummy_candidate_receipt_bad_sig, dummy_hash, }; +mod prospective_parachains; + const ACTIVITY_TIMEOUT: Duration = Duration::from_millis(500); const DECLARE_TIMEOUT: Duration = Duration::from_millis(25); @@ -247,11 +249,8 @@ async fn assert_candidate_backing_second( expected_relay_parent: Hash, expected_para_id: ParaId, expected_pov: &PoV, + mode: ProspectiveParachainsMode, ) -> CandidateReceipt { - // TODO [https://github.com/paritytech/polkadot/issues/5054] - // - // While collator protocol isn't updated, it's expected to receive - // a Runtime API request for persisted validation data. let pvd = PersistedValidationData { parent_head: HeadData(vec![7, 8, 9]), relay_parent_number: 5, @@ -259,18 +258,32 @@ async fn assert_candidate_backing_second( relay_parent_storage_root: Default::default(), }; - assert_matches!( - overseer_recv(virtual_overseer).await, - AllMessages::RuntimeApi(RuntimeApiMessage::Request( - hash, - RuntimeApiRequest::PersistedValidationData(para_id, assumption, tx), - )) => { - assert_eq!(expected_relay_parent, hash); - assert_eq!(expected_para_id, para_id); - assert_eq!(OccupiedCoreAssumption::Free, assumption); - tx.send(Ok(Some(pvd.clone()))).unwrap(); - } - ); + // Depending on relay parent mode pvd will be either requested + // from the Runtime API or Prospective Parachains. + match mode { + ProspectiveParachainsMode::Disabled => assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + hash, + RuntimeApiRequest::PersistedValidationData(para_id, assumption, tx), + )) => { + assert_eq!(expected_relay_parent, hash); + assert_eq!(expected_para_id, para_id); + assert_eq!(OccupiedCoreAssumption::Free, assumption); + tx.send(Ok(Some(pvd.clone()))).unwrap(); + } + ), + ProspectiveParachainsMode::Enabled => assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::ProspectiveParachains( + ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx), + ) => { + assert_eq!(expected_relay_parent, request.candidate_relay_parent); + assert_eq!(expected_para_id, request.para_id); + tx.send(Some(pvd.clone())).unwrap(); + } + ), + } assert_matches!( overseer_recv(virtual_overseer).await, @@ -308,6 +321,7 @@ async fn assert_fetch_collation_request( virtual_overseer: &mut VirtualOverseer, relay_parent: Hash, para_id: ParaId, + candidate_hash: Option, ) -> ResponseSender { assert_matches!( overseer_recv(virtual_overseer).await, @@ -315,14 +329,26 @@ async fn assert_fetch_collation_request( ) => { let req = reqs.into_iter().next() .expect("There should be exactly one request"); - match req { - Requests::CollationFetchingV1(req) => { - let payload = req.payload; - assert_eq!(payload.relay_parent, relay_parent); - assert_eq!(payload.para_id, para_id); - req.pending_response - } - _ => panic!("Unexpected request"), + match candidate_hash { + None => assert_matches!( + req, + Requests::CollationFetchingV1(req) => { + let payload = req.payload; + assert_eq!(payload.relay_parent, relay_parent); + assert_eq!(payload.para_id, para_id); + req.pending_response + } + ), + Some(candidate_hash) => assert_matches!( + req, + Requests::CollationFetchingVStaging(req) => { + let payload = req.payload; + assert_eq!(payload.relay_parent, relay_parent); + assert_eq!(payload.para_id, para_id); + assert_eq!(payload.candidate_hash, candidate_hash); + req.pending_response + } + ), } }) } @@ -333,27 +359,38 @@ async fn connect_and_declare_collator( peer: PeerId, collator: CollatorPair, para_id: ParaId, + version: CollationVersion, ) { overseer_send( virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerConnected( peer.clone(), ObservedRole::Full, - CollationVersion::V1.into(), + version.into(), None, )), ) .await; - overseer_send( - virtual_overseer, - CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerMessage( - peer.clone(), - Versioned::V1(protocol_v1::CollatorProtocolMessage::Declare( + let wire_message = match version { + CollationVersion::V1 => Versioned::V1(protocol_v1::CollatorProtocolMessage::Declare( + collator.public(), + para_id, + collator.sign(&protocol_v1::declare_signature_payload(&peer)), + )), + CollationVersion::VStaging => + Versioned::VStaging(protocol_vstaging::CollatorProtocolMessage::Declare( collator.public(), para_id, collator.sign(&protocol_v1::declare_signature_payload(&peer)), )), + }; + + overseer_send( + virtual_overseer, + CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerMessage( + peer, + wire_message, )), ) .await; @@ -364,12 +401,23 @@ async fn advertise_collation( virtual_overseer: &mut VirtualOverseer, peer: PeerId, relay_parent: Hash, + candidate: Option<(CandidateHash, Hash)>, // Candidate hash + parent head data hash. ) { + let wire_message = match candidate { + Some((candidate_hash, parent_head_data_hash)) => + Versioned::VStaging(protocol_vstaging::CollatorProtocolMessage::AdvertiseCollation { + relay_parent, + candidate_hash, + parent_head_data_hash, + }), + None => + Versioned::V1(protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent)), + }; overseer_send( virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::PeerMessage( peer, - Versioned::V1(protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent)), + wire_message, )), ) .await; @@ -417,15 +465,75 @@ fn act_on_advertisement() { peer_b.clone(), pair.clone(), test_state.chain_ids[0], + CollationVersion::V1, + ) + .await; + + advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent, None) + .await; + + assert_fetch_collation_request( + &mut virtual_overseer, + test_state.relay_parent, + test_state.chain_ids[0], + None, + ) + .await; + + virtual_overseer + }); +} + +/// Tests that validator side works with vstaging network protocol +/// before async backing is enabled. +#[test] +fn act_on_advertisement_vstaging() { + let test_state = TestState::default(); + + test_harness(|test_harness| async move { + let TestHarness { mut virtual_overseer } = test_harness; + + let pair = CollatorPair::generate().0; + gum::trace!("activating"); + + overseer_send( + &mut virtual_overseer, + CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange( + our_view![test_state.relay_parent], + )), + ) + .await; + + assert_runtime_version_request(&mut virtual_overseer, test_state.relay_parent).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + + let peer_b = PeerId::random(); + + connect_and_declare_collator( + &mut virtual_overseer, + peer_b.clone(), + pair.clone(), + test_state.chain_ids[0], + CollationVersion::VStaging, ) .await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent).await; + let candidate_hash = CandidateHash::default(); + let parent_head_data_hash = Hash::zero(); + // vstaging advertisement. + advertise_collation( + &mut virtual_overseer, + peer_b.clone(), + test_state.relay_parent, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; assert_fetch_collation_request( &mut virtual_overseer, test_state.relay_parent, test_state.chain_ids[0], + Some(candidate_hash), ) .await; @@ -461,6 +569,7 @@ fn collator_reporting_works() { peer_b.clone(), test_state.collators[0].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -469,6 +578,7 @@ fn collator_reporting_works() { peer_c.clone(), test_state.collators[1].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -583,6 +693,7 @@ fn fetch_collations_works() { peer_b.clone(), test_state.collators[0].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -591,16 +702,20 @@ fn fetch_collations_works() { peer_c.clone(), test_state.collators[1].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent).await; - advertise_collation(&mut virtual_overseer, peer_c.clone(), test_state.relay_parent).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent, None) + .await; + advertise_collation(&mut virtual_overseer, peer_c.clone(), test_state.relay_parent, None) + .await; let response_channel = assert_fetch_collation_request( &mut virtual_overseer, test_state.relay_parent, test_state.chain_ids[0], + None, ) .await; @@ -627,6 +742,7 @@ fn fetch_collations_works() { test_state.relay_parent, test_state.chain_ids[0], &pov, + ProspectiveParachainsMode::Disabled, ) .await; @@ -655,6 +771,7 @@ fn fetch_collations_works() { peer_b.clone(), test_state.collators[2].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -663,6 +780,7 @@ fn fetch_collations_works() { peer_c.clone(), test_state.collators[3].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -671,27 +789,41 @@ fn fetch_collations_works() { peer_d.clone(), test_state.collators[4].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), second).await; - advertise_collation(&mut virtual_overseer, peer_c.clone(), second).await; - advertise_collation(&mut virtual_overseer, peer_d.clone(), second).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), second, None).await; + advertise_collation(&mut virtual_overseer, peer_c.clone(), second, None).await; + advertise_collation(&mut virtual_overseer, peer_d.clone(), second, None).await; // Dropping the response channel should lead to fetching the second collation. - assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0]) - .await; + assert_fetch_collation_request( + &mut virtual_overseer, + second, + test_state.chain_ids[0], + None, + ) + .await; - let response_channel_non_exclusive = - assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0]) - .await; + let response_channel_non_exclusive = assert_fetch_collation_request( + &mut virtual_overseer, + second, + test_state.chain_ids[0], + None, + ) + .await; // Third collator should receive response after that timeout: Delay::new(MAX_UNSHARED_DOWNLOAD_TIME + Duration::from_millis(50)).await; - let response_channel = - assert_fetch_collation_request(&mut virtual_overseer, second, test_state.chain_ids[0]) - .await; + let response_channel = assert_fetch_collation_request( + &mut virtual_overseer, + second, + test_state.chain_ids[0], + None, + ) + .await; let pov = PoV { block_data: BlockData(vec![1]) }; let mut candidate_a = @@ -721,6 +853,7 @@ fn fetch_collations_works() { second, test_state.chain_ids[0], &pov, + ProspectiveParachainsMode::Disabled, ) .await; @@ -753,6 +886,7 @@ fn reject_connection_to_next_group() { peer_b.clone(), test_state.collators[0].clone(), test_state.chain_ids[1].clone(), // next, not current `para_id` + CollationVersion::V1, ) .await; @@ -806,6 +940,7 @@ fn fetch_next_collation_on_invalid_collation() { peer_b.clone(), test_state.collators[0].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; @@ -814,16 +949,20 @@ fn fetch_next_collation_on_invalid_collation() { peer_c.clone(), test_state.collators[1].clone(), test_state.chain_ids[0].clone(), + CollationVersion::V1, ) .await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent).await; - advertise_collation(&mut virtual_overseer, peer_c.clone(), test_state.relay_parent).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent, None) + .await; + advertise_collation(&mut virtual_overseer, peer_c.clone(), test_state.relay_parent, None) + .await; let response_channel = assert_fetch_collation_request( &mut virtual_overseer, test_state.relay_parent, test_state.chain_ids[0], + None, ) .await; @@ -845,6 +984,7 @@ fn fetch_next_collation_on_invalid_collation() { test_state.relay_parent, test_state.chain_ids[0], &pov, + ProspectiveParachainsMode::Disabled, ) .await; @@ -871,6 +1011,7 @@ fn fetch_next_collation_on_invalid_collation() { &mut virtual_overseer, test_state.relay_parent, test_state.chain_ids[0], + None, ) .await; @@ -907,14 +1048,17 @@ fn inactive_disconnected() { peer_b.clone(), pair.clone(), test_state.chain_ids[0], + CollationVersion::V1, ) .await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), test_state.relay_parent, None) + .await; assert_fetch_collation_request( &mut virtual_overseer, test_state.relay_parent, test_state.chain_ids[0], + None, ) .await; @@ -960,29 +1104,45 @@ fn activity_extends_life() { peer_b.clone(), pair.clone(), test_state.chain_ids[0], + CollationVersion::V1, ) .await; Delay::new(ACTIVITY_TIMEOUT * 2 / 3).await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_a).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_a, None).await; - assert_fetch_collation_request(&mut virtual_overseer, hash_a, test_state.chain_ids[0]) - .await; + assert_fetch_collation_request( + &mut virtual_overseer, + hash_a, + test_state.chain_ids[0], + None, + ) + .await; Delay::new(ACTIVITY_TIMEOUT * 2 / 3).await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_b).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_b, None).await; - assert_fetch_collation_request(&mut virtual_overseer, hash_b, test_state.chain_ids[0]) - .await; + assert_fetch_collation_request( + &mut virtual_overseer, + hash_b, + test_state.chain_ids[0], + None, + ) + .await; Delay::new(ACTIVITY_TIMEOUT * 2 / 3).await; - advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_c).await; + advertise_collation(&mut virtual_overseer, peer_b.clone(), hash_c, None).await; - assert_fetch_collation_request(&mut virtual_overseer, hash_c, test_state.chain_ids[0]) - .await; + assert_fetch_collation_request( + &mut virtual_overseer, + hash_c, + test_state.chain_ids[0], + None, + ) + .await; Delay::new(ACTIVITY_TIMEOUT * 3 / 2).await; @@ -1119,6 +1279,7 @@ fn view_change_clears_old_collators() { peer_b.clone(), pair.clone(), test_state.chain_ids[0], + CollationVersion::V1, ) .await; diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs new file mode 100644 index 000000000000..30b5fd4bd320 --- /dev/null +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -0,0 +1,164 @@ +// Copyright 2022 Parity Technologies (UK) Ltd. +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . + +//! Tests for the validator side with enabled prospective parachains. + +use super::*; + +use polkadot_node_subsystem::messages::ChainApiMessage; +use polkadot_primitives::v2::Header; + +const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; + +const ALLOWED_ANCESTRY: u32 = 3; + +fn get_parent_hash(hash: Hash) -> Hash { + Hash::from_low_u64_be(hash.to_low_u64_be() + 1) +} + +/// Handle a view update. +async fn update_view( + virtual_overseer: &mut VirtualOverseer, + test_state: &TestState, + new_view: Vec<(Hash, u32)>, // Hash and block number. + activated: u8, // How many new heads does this update contain? +) { + let new_view: HashMap = HashMap::from_iter(new_view); + + let our_view = + OurView::new(new_view.keys().map(|hash| (*hash, Arc::new(jaeger::Span::Disabled))), 0); + + overseer_send( + virtual_overseer, + CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange(our_view)), + ) + .await; + + let mut next_overseer_message = None; + for _ in 0..activated { + let (leaf_hash, leaf_number) = assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + parent, + RuntimeApiRequest::Version(tx), + )) => { + tx.send(Ok(API_VERSION_PROSPECTIVE_ENABLED)).unwrap(); + (parent, new_view.get(&parent).copied().expect("Unknown parent requested")) + } + ); + + let min_number = leaf_number.saturating_sub(ALLOWED_ANCESTRY); + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::ProspectiveParachains( + ProspectiveParachainsMessage::GetMinimumRelayParents(parent, tx), + ) if parent == leaf_hash => { + tx.send(test_state.chain_ids.iter().map(|para_id| (*para_id, min_number)).collect()).unwrap(); + } + ); + + let ancestry_len = leaf_number + 1 - min_number; + let ancestry_hashes = std::iter::successors(Some(leaf_hash), |h| Some(get_parent_hash(*h))) + .take(ancestry_len as usize); + let ancestry_numbers = (min_number..=leaf_number).rev(); + let ancestry_iter = ancestry_hashes.clone().zip(ancestry_numbers).peekable(); + + // How many blocks were actually requested. + let mut requested_len = 0; + { + let mut ancestry_iter = ancestry_iter.clone(); + loop { + let (hash, number) = match ancestry_iter.next() { + Some((hash, number)) => (hash, number), + None => break, + }; + + // May be `None` for the last element. + let parent_hash = + ancestry_iter.peek().map(|(h, _)| *h).unwrap_or_else(|| get_parent_hash(hash)); + + let msg = match next_overseer_message.take() { + Some(msg) => msg, + None => overseer_recv(virtual_overseer).await, + }; + + if !matches!( + &msg, + AllMessages::ChainApi(ChainApiMessage::BlockHeader(_hash, ..)) + if *_hash == hash + ) { + // Ancestry has already been cached for this leaf. + next_overseer_message.replace(msg); + break + } + + assert_matches!( + msg, + AllMessages::ChainApi(ChainApiMessage::BlockHeader(.., tx)) => { + let header = Header { + parent_hash, + number, + state_root: Hash::zero(), + extrinsics_root: Hash::zero(), + digest: Default::default(), + }; + + tx.send(Ok(Some(header))).unwrap(); + } + ); + + requested_len += 1; + } + } + + for (hash, number) in ancestry_iter.take(requested_len) { + let msg = match next_overseer_message.take() { + Some(msg) => msg, + None => virtual_overseer.recv().await, + }; + assert_matches!( + msg, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::Validators(tx)) + ) if parent == hash => { + tx.send(Ok(test_state.validator_public.clone())).unwrap(); + } + ); + + assert_matches!( + virtual_overseer.recv().await, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::ValidatorGroups(tx)) + ) if parent == hash => { + let validator_groups = test_state.validator_groups.clone(); + let mut group_rotation_info = test_state.group_rotation_info.clone(); + group_rotation_info.now = number; + tx.send(Ok((validator_groups, group_rotation_info))).unwrap(); + } + ); + + assert_matches!( + virtual_overseer.recv().await, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::AvailabilityCores(tx)) + ) if parent == hash => { + tx.send(Ok(test_state.cores.clone())).unwrap(); + } + ); + } + } +} From 0b0c70a3f7b8839a06080d141c680e4104403eb1 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Fri, 16 Sep 2022 01:01:54 +0300 Subject: [PATCH 30/45] Validator side tests --- .../src/validator_side/collation.rs | 2 +- .../src/validator_side/mod.rs | 2 - .../src/validator_side/tests/mod.rs | 35 +- .../tests/prospective_parachains.rs | 408 ++++++++++++++++-- .../protocol/src/request_response/vstaging.rs | 5 +- 5 files changed, 391 insertions(+), 61 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 7ca64bca9adb..5516df913556 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -144,7 +144,7 @@ impl Collations { self.seconded_count += 1 } - /// Returns the next collation to fetch from the `unfetched_collations`. + /// Returns the next collation to fetch from the `waiting_queue`. /// /// This will reset the status back to `Waiting` using [`CollationStatus::back_to_waiting`]. /// diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 9a218297ed20..9952f1564136 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -667,8 +667,6 @@ async fn notify_collation_seconded( sender .send_message(NetworkBridgeTxMessage::SendCollationMessage(vec![peer_id], wire_message)) .await; - - modify_reputation(sender, peer_id, BENEFIT_NOTIFY_GOOD).await; } /// A peer's view has changed. A number of things should be done: diff --git a/node/network/collator-protocol/src/validator_side/tests/mod.rs b/node/network/collator-protocol/src/validator_side/tests/mod.rs index 6817cbda2f0a..47b6617cf5e8 100644 --- a/node/network/collator-protocol/src/validator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/validator_side/tests/mod.rs @@ -121,6 +121,7 @@ type VirtualOverseer = test_helpers::TestSubsystemContextHandle>(test: impl FnOnce(TestHarness) -> T) { @@ -142,9 +143,10 @@ fn test_harness>(test: impl FnOnce(TestHarne ) .unwrap(); + let keystore: SyncCryptoStorePtr = Arc::new(keystore); let subsystem = run( context, - Arc::new(keystore), + keystore.clone(), crate::CollatorEvictionPolicy { inactive_collator: ACTIVITY_TIMEOUT, undeclared: DECLARE_TIMEOUT, @@ -152,7 +154,7 @@ fn test_harness>(test: impl FnOnce(TestHarne Metrics::default(), ); - let test_fut = test(TestHarness { virtual_overseer }); + let test_fut = test(TestHarness { virtual_overseer, keystore }); futures::pin_mut!(test_fut); futures::pin_mut!(subsystem); @@ -260,9 +262,10 @@ async fn assert_candidate_backing_second( // Depending on relay parent mode pvd will be either requested // from the Runtime API or Prospective Parachains. + let msg = overseer_recv(virtual_overseer).await; match mode { ProspectiveParachainsMode::Disabled => assert_matches!( - overseer_recv(virtual_overseer).await, + msg, AllMessages::RuntimeApi(RuntimeApiMessage::Request( hash, RuntimeApiRequest::PersistedValidationData(para_id, assumption, tx), @@ -274,7 +277,7 @@ async fn assert_candidate_backing_second( } ), ProspectiveParachainsMode::Enabled => assert_matches!( - overseer_recv(virtual_overseer).await, + msg, AllMessages::ProspectiveParachains( ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx), ) => { @@ -442,7 +445,7 @@ fn act_on_advertisement() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; gum::trace!("activating"); @@ -491,7 +494,7 @@ fn act_on_advertisement_vstaging() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; gum::trace!("activating"); @@ -547,7 +550,7 @@ fn collator_reporting_works() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; overseer_send( &mut virtual_overseer, @@ -608,7 +611,7 @@ fn collator_authentication_verification_works() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let peer_b = PeerId::random(); @@ -665,7 +668,7 @@ fn fetch_collations_works() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let second = Hash::random(); @@ -866,7 +869,7 @@ fn reject_connection_to_next_group() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; overseer_send( &mut virtual_overseer, @@ -913,7 +916,7 @@ fn fetch_next_collation_on_invalid_collation() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let second = Hash::random(); @@ -1024,7 +1027,7 @@ fn inactive_disconnected() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; @@ -1074,7 +1077,7 @@ fn activity_extends_life() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; @@ -1157,7 +1160,7 @@ fn disconnect_if_no_declare() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; overseer_send( &mut virtual_overseer, @@ -1194,7 +1197,7 @@ fn disconnect_if_wrong_declare() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; @@ -1257,7 +1260,7 @@ fn view_change_clears_old_collators() { let mut test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let pair = CollatorPair::generate().0; diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs index 30b5fd4bd320..5107a5df70a6 100644 --- a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -19,7 +19,10 @@ use super::*; use polkadot_node_subsystem::messages::ChainApiMessage; -use polkadot_primitives::v2::Header; +use polkadot_primitives::v2::{ + BlockNumber, CandidateCommitments, CommittedCandidateReceipt, Header, SigningContext, + ValidatorId, +}; const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; @@ -29,6 +32,48 @@ fn get_parent_hash(hash: Hash) -> Hash { Hash::from_low_u64_be(hash.to_low_u64_be() + 1) } +async fn assert_assign_incoming( + virtual_overseer: &mut VirtualOverseer, + test_state: &TestState, + hash: Hash, + number: BlockNumber, + next_msg: &mut Option, +) { + let msg = match next_msg.take() { + Some(msg) => msg, + None => overseer_recv(virtual_overseer).await, + }; + assert_matches!( + msg, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::Validators(tx)) + ) if parent == hash => { + tx.send(Ok(test_state.validator_public.clone())).unwrap(); + } + ); + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::ValidatorGroups(tx)) + ) if parent == hash => { + let validator_groups = test_state.validator_groups.clone(); + let mut group_rotation_info = test_state.group_rotation_info.clone(); + group_rotation_info.now = number; + tx.send(Ok((validator_groups, group_rotation_info))).unwrap(); + } + ); + + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::AvailabilityCores(tx)) + ) if parent == hash => { + tx.send(Ok(test_state.cores.clone())).unwrap(); + } + ); +} + /// Handle a view update. async fn update_view( virtual_overseer: &mut VirtualOverseer, @@ -60,6 +105,15 @@ async fn update_view( } ); + assert_assign_incoming( + virtual_overseer, + test_state, + leaf_hash, + leaf_number, + &mut next_overseer_message, + ) + .await; + let min_number = leaf_number.saturating_sub(ALLOWED_ANCESTRY); assert_matches!( @@ -78,7 +132,7 @@ async fn update_view( let ancestry_iter = ancestry_hashes.clone().zip(ancestry_numbers).peekable(); // How many blocks were actually requested. - let mut requested_len = 0; + let mut requested_len: usize = 0; { let mut ancestry_iter = ancestry_iter.clone(); loop { @@ -96,11 +150,7 @@ async fn update_view( None => overseer_recv(virtual_overseer).await, }; - if !matches!( - &msg, - AllMessages::ChainApi(ChainApiMessage::BlockHeader(_hash, ..)) - if *_hash == hash - ) { + if !matches!(&msg, AllMessages::ChainApi(ChainApiMessage::BlockHeader(..))) { // Ancestry has already been cached for this leaf. next_overseer_message.replace(msg); break @@ -125,40 +175,316 @@ async fn update_view( } } - for (hash, number) in ancestry_iter.take(requested_len) { - let msg = match next_overseer_message.take() { - Some(msg) => msg, - None => virtual_overseer.recv().await, - }; - assert_matches!( - msg, - AllMessages::RuntimeApi( - RuntimeApiMessage::Request(parent, RuntimeApiRequest::Validators(tx)) - ) if parent == hash => { - tx.send(Ok(test_state.validator_public.clone())).unwrap(); - } - ); - - assert_matches!( - virtual_overseer.recv().await, - AllMessages::RuntimeApi( - RuntimeApiMessage::Request(parent, RuntimeApiRequest::ValidatorGroups(tx)) - ) if parent == hash => { - let validator_groups = test_state.validator_groups.clone(); - let mut group_rotation_info = test_state.group_rotation_info.clone(); - group_rotation_info.now = number; - tx.send(Ok((validator_groups, group_rotation_info))).unwrap(); - } - ); - - assert_matches!( - virtual_overseer.recv().await, - AllMessages::RuntimeApi( - RuntimeApiMessage::Request(parent, RuntimeApiRequest::AvailabilityCores(tx)) - ) if parent == hash => { - tx.send(Ok(test_state.cores.clone())).unwrap(); - } - ); + // Skip the leaf. + for (hash, number) in ancestry_iter.skip(1).take(requested_len.saturating_sub(1)) { + assert_assign_incoming( + virtual_overseer, + test_state, + hash, + number, + &mut next_overseer_message, + ) + .await; } } } + +async fn send_seconded_statement( + virtual_overseer: &mut VirtualOverseer, + keystore: SyncCryptoStorePtr, + candidate: &CommittedCandidateReceipt, +) { + let signing_context = SigningContext { session_index: 0, parent_hash: Hash::zero() }; + let stmt = SignedFullStatement::sign( + &keystore, + Statement::Seconded(candidate.clone()), + &signing_context, + ValidatorIndex(0), + &ValidatorId::from(Sr25519Keyring::Alice.public()), + ) + .await + .ok() + .flatten() + .expect("should be signed"); + + overseer_send( + virtual_overseer, + CollatorProtocolMessage::Seconded(candidate.descriptor.relay_parent, stmt), + ) + .await; +} + +async fn assert_collation_seconded( + virtual_overseer: &mut VirtualOverseer, + relay_parent: Hash, + peer_id: PeerId, +) { + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ReportPeer( + peer, + rep, + )) => { + assert_eq!(peer_id, peer); + assert_eq!(rep, BENEFIT_NOTIFY_GOOD); + } + ); + assert_matches!( + overseer_recv(virtual_overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendCollationMessage( + peers, + Versioned::VStaging(protocol_vstaging::CollationProtocol::CollatorProtocol( + protocol_vstaging::CollatorProtocolMessage::CollationSeconded( + _relay_parent, + .., + ), + )), + )) => { + assert_eq!(peers, vec![peer_id]); + assert_eq!(relay_parent, _relay_parent); + } + ); +} + +#[test] +fn v1_advertisement_rejected() { + let test_state = TestState::default(); + + test_harness(|test_harness| async move { + let TestHarness { mut virtual_overseer, .. } = test_harness; + + let pair_a = CollatorPair::generate().0; + + let head_b = Hash::from_low_u64_be(128); + let head_b_num: u32 = 0; + + update_view(&mut virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; + + let peer_a = PeerId::random(); + + // Accept both collators from the implicit view. + connect_and_declare_collator( + &mut virtual_overseer, + peer_a, + pair_a.clone(), + test_state.chain_ids[0], + CollationVersion::V1, + ) + .await; + + advertise_collation(&mut virtual_overseer, peer_a, head_b, None).await; + + // Not reported. + assert!(overseer_recv_with_timeout(&mut virtual_overseer, Duration::from_millis(50)) + .await + .is_none()); + + virtual_overseer + }); +} + +#[test] +fn accept_advertisements_from_implicit_view() { + let test_state = TestState::default(); + + test_harness(|test_harness| async move { + let TestHarness { mut virtual_overseer, .. } = test_harness; + + let pair_a = CollatorPair::generate().0; + let pair_b = CollatorPair::generate().0; + + let head_b = Hash::from_low_u64_be(128); + let head_b_num: u32 = 2; + + // Grandparent of head `b`. + // Group rotation frequency is 1 by default, at `c` we're assigned + // to the first para. + let head_c = Hash::from_low_u64_be(130); + + // Activated leaf is `b`, but the collation will be based on `c`. + update_view(&mut virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; + + let peer_a = PeerId::random(); + let peer_b = PeerId::random(); + + // Accept both collators from the implicit view. + connect_and_declare_collator( + &mut virtual_overseer, + peer_a, + pair_a.clone(), + test_state.chain_ids[0], + CollationVersion::VStaging, + ) + .await; + connect_and_declare_collator( + &mut virtual_overseer, + peer_b, + pair_b.clone(), + test_state.chain_ids[1], + CollationVersion::VStaging, + ) + .await; + + let candidate_hash = CandidateHash::default(); + let parent_head_data_hash = Hash::zero(); + advertise_collation( + &mut virtual_overseer, + peer_a, + head_c, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; + + assert_fetch_collation_request( + &mut virtual_overseer, + head_c, + test_state.chain_ids[0], + Some(candidate_hash), + ) + .await; + + virtual_overseer + }); +} + +#[test] +fn second_multiple_candidates_per_relay_parent() { + let test_state = TestState::default(); + + test_harness(|test_harness| async move { + let TestHarness { mut virtual_overseer, keystore } = test_harness; + + let pair = CollatorPair::generate().0; + + // Grandparent of head `a`. + let head_b = Hash::from_low_u64_be(128); + let head_b_num: u32 = 2; + + // Grandparent of head `b`. + // Group rotation frequency is 1 by default, at `c` we're assigned + // to the first para. + let head_c = Hash::from_low_u64_be(130); + + // Activated leaf is `b`, but the collation will be based on `c`. + update_view(&mut virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; + + let peer_a = PeerId::random(); + + connect_and_declare_collator( + &mut virtual_overseer, + peer_a, + pair.clone(), + test_state.chain_ids[0], + CollationVersion::VStaging, + ) + .await; + + for i in 0..(MAX_CANDIDATE_DEPTH + 1) { + let candidate_hash = CandidateHash(Hash::repeat_byte(i as u8)); + let parent_head_data_hash = Hash::zero(); + + advertise_collation( + &mut virtual_overseer, + peer_a, + head_c, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; + + let response_channel = assert_fetch_collation_request( + &mut virtual_overseer, + head_c, + test_state.chain_ids[0], + Some(candidate_hash), + ) + .await; + + let pov = PoV { block_data: BlockData(vec![]) }; + let mut candidate = dummy_candidate_receipt_bad_sig(head_c, Some(Default::default())); + candidate.descriptor.para_id = test_state.chain_ids[0]; + candidate.descriptor.relay_parent = head_c; + let commitments = CandidateCommitments { + head_data: HeadData(vec![1, 2, 3]), + horizontal_messages: Vec::new(), + upward_messages: Vec::new(), + new_validation_code: None, + processed_downward_messages: 0, + hrmp_watermark: 0, + }; + candidate.commitments_hash = commitments.hash(); + + response_channel + .send(Ok(request_vstaging::CollationFetchingResponse::Collation( + candidate.clone(), + pov.clone(), + ) + .encode())) + .expect("Sending response should succeed"); + + assert_candidate_backing_second( + &mut virtual_overseer, + head_c, + test_state.chain_ids[0], + &pov, + ProspectiveParachainsMode::Enabled, + ) + .await; + + let candidate = + CommittedCandidateReceipt { descriptor: candidate.descriptor, commitments }; + + send_seconded_statement(&mut virtual_overseer, keystore.clone(), &candidate).await; + + assert_collation_seconded(&mut virtual_overseer, head_c, peer_a).await; + } + + // No more advertisements can be made for this relay parent. + let candidate_hash = CandidateHash(Hash::repeat_byte(0xAA)); + advertise_collation( + &mut virtual_overseer, + peer_a, + head_c, + Some((candidate_hash, Hash::zero())), + ) + .await; + + // Reported because reached the limit of advertisements per relay parent. + assert_matches!( + overseer_recv(&mut virtual_overseer).await, + AllMessages::NetworkBridgeTx( + NetworkBridgeTxMessage::ReportPeer(peer_id, rep), + ) => { + assert_eq!(peer_a, peer_id); + assert_eq!(rep, COST_UNEXPECTED_MESSAGE); + } + ); + + // By different peer too (not reported). + let pair_b = CollatorPair::generate().0; + let peer_b = PeerId::random(); + + connect_and_declare_collator( + &mut virtual_overseer, + peer_b, + pair_b.clone(), + test_state.chain_ids[0], + CollationVersion::VStaging, + ) + .await; + + let candidate_hash = CandidateHash(Hash::repeat_byte(0xFF)); + advertise_collation( + &mut virtual_overseer, + peer_b, + head_c, + Some((candidate_hash, Hash::zero())), + ) + .await; + + assert!(overseer_recv_with_timeout(&mut virtual_overseer, Duration::from_millis(50)) + .await + .is_none()); + + virtual_overseer + }); +} diff --git a/node/network/protocol/src/request_response/vstaging.rs b/node/network/protocol/src/request_response/vstaging.rs index 058bdccdcb03..0b8d223e3aee 100644 --- a/node/network/protocol/src/request_response/vstaging.rs +++ b/node/network/protocol/src/request_response/vstaging.rs @@ -19,6 +19,9 @@ use polkadot_primitives::v2::{CandidateHash, Hash, Id as ParaId}; use super::{IsRequest, Protocol}; +/// Responses as sent by collators. +pub type CollationFetchingResponse = super::v1::CollationFetchingResponse; + /// Request the advertised collation at that relay-parent. #[derive(Debug, Clone, Encode, Decode)] pub struct CollationFetchingRequest { @@ -32,6 +35,6 @@ pub struct CollationFetchingRequest { impl IsRequest for CollationFetchingRequest { // The response is the same as for V1. - type Response = super::v1::CollationFetchingResponse; + type Response = CollationFetchingResponse; const PROTOCOL: Protocol = Protocol::CollationFetchingVStaging; } From 941a03149cf96ca0d2445fbddef81f91bb0e3ae5 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Mon, 3 Oct 2022 22:36:15 +0300 Subject: [PATCH 31/45] more improvements --- .../src/collator_side/collation.rs | 5 ++ .../src/collator_side/mod.rs | 52 +++++++++----- .../tests/prospective_parachains.rs | 4 +- node/network/collator-protocol/src/lib.rs | 6 +- .../src/validator_side/collation.rs | 14 ++++ .../src/validator_side/mod.rs | 69 ++++++++++--------- .../tests/prospective_parachains.rs | 4 +- node/network/protocol/src/lib.rs | 2 +- 8 files changed, 98 insertions(+), 58 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs index b520c9fda55d..26a8a92d957b 100644 --- a/node/network/collator-protocol/src/collator_side/collation.rs +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -135,6 +135,11 @@ impl VersionedCollationRequest { } } +/// Result of the finished background send-collation task. +/// +/// Note that if the timeout was hit the request doesn't get +/// aborted, it only indicates that we should start processing +/// the next one from the queue. pub struct CollationSendResult { pub relay_parent: Hash, pub candidate_hash: CandidateHash, diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 833000837efe..dfab31b6d726 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -324,7 +324,8 @@ async fn distribute_collation( gum::debug!( target: LOG_TARGET, ?candidate_relay_parent, - "Already seen collation for this relay parent", + ?candidate_hash, + "Already seen this candidate", ); return Ok(()) } @@ -394,6 +395,9 @@ async fn distribute_collation( Collation { receipt, parent_head_data_hash, pov, status: CollationStatus::Created }, ); + // If prospective parachains are disabled, a leaf should be known to peer. + // Otherwise, it should be present in allowed ancestry of some leaf. + // // It's collation-producer responsibility to verify that there exists // a hypothetical membership in a fragment tree for candidate. let interested = @@ -503,6 +507,8 @@ async fn determine_our_validators( Ok(current_validators) } +/// Construct the declare message to be sent to validator depending on its +/// network protocol version. fn declare_message( state: &mut State, version: CollationVersion, @@ -534,7 +540,7 @@ fn declare_message( }) } -/// Issue a `Declare` collation message to the given `peer`. +/// Issue versioned `Declare` collation message to the given `peer`. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] async fn declare( ctx: &mut Context, @@ -568,8 +574,10 @@ async fn connect_to_validators( /// Advertise collation to the given `peer`. /// -/// This will only advertise a collation if there exists one for the given `relay_parent` and the given `peer` is -/// set as validator for our para at the given `relay_parent`. +/// This will only advertise a collation if there exists at least one for the given +/// `relay_parent` and the given `peer` is set as validator for our para at the given `relay_parent`. +/// +/// We also make sure not to advertise the same collation multiple times to the same validator. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] async fn advertise_collation( ctx: &mut Context, @@ -581,6 +589,19 @@ async fn advertise_collation( metrics: &Metrics, ) { for (candidate_hash, collation) in per_relay_parent.collations.iter_mut() { + // Check that peer will be able to request the collation. + if let CollationVersion::V1 = protocol_version { + if per_relay_parent.prospective_parachains_mode.is_enabled() { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + peer_id = %peer, + "Skipping advertisement to validator, incorrect network protocol version", + ); + return + } + } + let should_advertise = per_relay_parent .validator_group @@ -616,15 +637,6 @@ async fn advertise_collation( )) }, CollationVersion::V1 => { - if per_relay_parent.prospective_parachains_mode.is_enabled() { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - peer_id = %peer, - "Skipping advertisement to validator, incorrect network protocol version", - ); - return - } let wire_message = protocol_v1::CollatorProtocolMessage::AdvertiseCollation(relay_parent); Versioned::V1(protocol_v1::CollationProtocol::CollatorProtocol(wire_message)) @@ -737,6 +749,8 @@ async fn send_collation( let peer_id = request.peer_id(); let candidate_hash = receipt.hash(); + // The response payload is the same for both versions of protocol + // and doesn't have vstaging alias for simplicity. let response = OutgoingResponse { result: Ok(request_v1::CollationFetchingResponse::Collation(receipt, pov)), reputation_changes: Vec::new(), @@ -949,7 +963,8 @@ async fn handle_incoming_request( Ok(()) } -/// Our view has changed. +/// Peer's view has changed. Send advertisements for new relay parents +/// if there're any. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] async fn handle_peer_view_change( ctx: &mut Context, @@ -1207,7 +1222,8 @@ pub(crate) async fn run( FromOrchestra::Signal(BlockFinalized(..)) => {} FromOrchestra::Signal(Conclude) => return Ok(()), }, - CollationSendResult { relay_parent, candidate_hash, peer_id, timed_out } = state.active_collation_fetches.select_next_some() => { + CollationSendResult { relay_parent, candidate_hash, peer_id, timed_out } = + state.active_collation_fetches.select_next_some() => { let next = if let Some(waiting) = state.waiting_collation_fetches.get_mut(&relay_parent) { if timed_out { gum::debug!( @@ -1217,8 +1233,10 @@ pub(crate) async fn run( ?candidate_hash, "Sending collation to validator timed out, carrying on with next validator." ); - // Drop all requests from slow peer. - waiting.req_queue.retain(|req| req.peer_id() != peer_id); + // We try to throttle requests per relay parent to give validators + // more bandwidth, but if the collation is not received within the + // timeout, we simply start processing next request. + // The request it still alive, it should be kept in a waiting queue. } else { waiting.waiting_peers.remove(&(peer_id, candidate_hash)); } diff --git a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs index 67cdef1fed34..d1052afd3f9b 100644 --- a/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -21,8 +21,6 @@ use super::*; use polkadot_node_subsystem::messages::{ChainApiMessage, ProspectiveParachainsMessage}; use polkadot_primitives::v2::{Header, OccupiedCore}; -const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; - const ALLOWED_ANCESTRY: u32 = 3; fn get_parent_hash(hash: Hash) -> Hash { @@ -55,7 +53,7 @@ async fn update_view( parent, RuntimeApiRequest::Version(tx), )) => { - tx.send(Ok(API_VERSION_PROSPECTIVE_ENABLED)).unwrap(); + tx.send(Ok(RuntimeApiRequest::VALIDITY_CONSTRAINTS)).unwrap(); (parent, new_view.get(&parent).copied().expect("Unknown parent requested")) } ); diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index 22eef5240658..c608aed8cefd 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -203,13 +203,13 @@ where .map_err(error::Error::CancelledRuntimeApiVersion)? .map_err(error::Error::RuntimeApi)?; - if version == 3 { + if version >= RuntimeApiRequest::VALIDITY_CONSTRAINTS { Ok(ProspectiveParachainsMode::Enabled) } else { - if version != 2 { + if version < 2 { gum::warn!( target: LOG_TARGET, - "Runtime API version is {}, expected 2 or 3. Prospective parachains are disabled", + "Runtime API version is {}, it is expected to be at least 2. Prospective parachains are disabled", version ); } diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 5516df913556..2a181c5a50f9 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -14,6 +14,19 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . +//! Primitives for tracking collations-related data. +//! +//! Usually a path of collations is as follows: +//! 1. First, collation must be advertised by collator. +//! 2. If the advertisement was accepted, it's queued for fetch (per relay parent). +//! 3. Once it's requested, the collation is said to be Pending. +//! 4. Pending collation becomes Fetched once received, we send it to backing for validation. +//! 5. If it turns to be invalid or async backing allows seconding another candidate, carry on with +//! the next advertisement, otherwise we're done with this relay parent. +//! +//! ┌──────────────────────────────────────────┐ +//! └─▶Advertised ─▶ Pending ─▶ Fetched ─▶ Validated + use futures::channel::oneshot; use std::collections::VecDeque; @@ -57,6 +70,7 @@ impl From<&CandidateReceipt> for FetchedCollation { } } +/// Identifier of a collation being requested. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] pub struct PendingCollation { pub relay_parent: Hash, diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 9952f1564136..9a9b80b39090 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -120,6 +120,9 @@ const CHECK_COLLATIONS_POLL: Duration = Duration::from_millis(5); struct PerRequest { /// Responses from collator. + /// + /// The response payload is the same for both versions of protocol + /// and doesn't have vstaging alias for simplicity. from_collator: Fuse>>, /// Sender to forward to initial requester. @@ -134,11 +137,11 @@ struct PerRequest { struct CollatingPeerState { collator_id: CollatorId, para_id: ParaId, - // Collations advertised by peer per relay parent. - // - // V1 network protocol doesn't include candidate hash in - // advertisements, we store an empty set in this case to occupy - // a slot in map. + /// Collations advertised by peer per relay parent. + /// + /// V1 network protocol doesn't include candidate hash in + /// advertisements, we store an empty set in this case to occupy + /// a slot in map. advertisements: HashMap>, last_active: Instant, } @@ -186,20 +189,20 @@ impl PeerData { let old_view = std::mem::replace(&mut self.view, new_view); if let PeerState::Collating(ref mut peer_state) = self.state { for removed in old_view.difference(&self.view) { - // Only keep advertisements if prospective parachains - // are enabled and the relay parent is a part of allowed - // ancestry. - let relay_parent_mode_enabled = per_relay_parent + // Remove relay parent advertisements if it went out + // of our (implicit) view. + let keep = per_relay_parent .get(removed) - .map_or(false, |s| s.prospective_parachains_mode.is_enabled()); - let keep = relay_parent_mode_enabled && - is_relay_parent_in_implicit_view( - removed, - ProspectiveParachainsMode::Enabled, - implicit_view, - active_leaves, - peer_state.para_id, - ); + .map(|s| { + is_relay_parent_in_implicit_view( + removed, + s.prospective_parachains_mode, + implicit_view, + active_leaves, + peer_state.para_id, + ) + }) + .unwrap_or(false); if !keep { peer_state.advertisements.remove(&removed); @@ -455,10 +458,7 @@ fn is_relay_parent_in_implicit_view( para_id: ParaId, ) -> bool { match relay_parent_mode { - ProspectiveParachainsMode::Disabled => { - // The head is known and async backing is disabled => it is an active leaf. - true - }, + ProspectiveParachainsMode::Disabled => active_leaves.contains_key(relay_parent), ProspectiveParachainsMode::Enabled => active_leaves.iter().any(|(hash, mode)| { mode.is_enabled() && implicit_view @@ -724,7 +724,7 @@ async fn request_collation( peer_id = %peer_id, para_id = %para_id, relay_parent = %relay_parent, - "Collation relay parent is out of view", + "Collation relay parent is unknown", ); return false }, @@ -954,21 +954,22 @@ where #[derive(Debug)] enum AdvertisementError { /// Relay parent is unknown. - RelayParentOutOfView, + RelayParentUnknown, /// Peer is not present in the subsystem state. UnknownPeer, /// Peer has not declared its para id. UndeclaredCollator, /// We're assigned to a different para at the given relay parent. InvalidAssignment, - /// Collator is trying to build on top of occupied core. + /// Collator is trying to build on top of occupied core + /// when async backing is disabled. CoreOccupied, /// An advertisement format doesn't match the relay parent. ProtocolMismatch, /// Para reached a limit of seconded candidates for this relay parent. SecondedLimitReached, - /// Failed to insert an advertisement. - FailedToInsert(InsertAdvertisementError), + /// Advertisement is invalid. + Invalid(InsertAdvertisementError), /// Failed to query prospective parachains subsystem. ProspectiveParachainsUnavailable, } @@ -978,7 +979,7 @@ impl AdvertisementError { use AdvertisementError::*; match self { InvalidAssignment => Some(COST_WRONG_PARA), - RelayParentOutOfView | UndeclaredCollator | CoreOccupied | FailedToInsert(_) => + RelayParentUnknown | UndeclaredCollator | CoreOccupied | Invalid(_) => Some(COST_UNEXPECTED_MESSAGE), UnknownPeer | ProtocolMismatch | @@ -1006,7 +1007,7 @@ where let per_relay_parent = state .per_relay_parent .get_mut(&relay_parent) - .ok_or(AdvertisementError::RelayParentOutOfView)?; + .ok_or(AdvertisementError::RelayParentUnknown)?; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; let assignment = per_relay_parent.assignment; @@ -1104,6 +1105,7 @@ where peer_id = ?peer_id, %para_id, ?relay_parent, + ?relay_parent_mode, "A collation has already been seconded", ); }, @@ -1113,7 +1115,7 @@ where // Checked above. return Err(AdvertisementError::ProtocolMismatch) }, - Err(error) => return Err(AdvertisementError::FailedToInsert(error)), + Err(error) => return Err(AdvertisementError::Invalid(error)), } Ok(()) @@ -1330,7 +1332,12 @@ async fn process_msg( let receipt = match stmt.payload() { Statement::Seconded(receipt) => receipt, Statement::Valid(_) => { - // Seconded statement expected. + gum::warn!( + target: LOG_TARGET, + ?stmt, + relay_parent = %parent, + "Seconded message received with a `Valid` statement", + ); return }, }; diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs index 5107a5df70a6..a40474ac3c9c 100644 --- a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -24,8 +24,6 @@ use polkadot_primitives::v2::{ ValidatorId, }; -const API_VERSION_PROSPECTIVE_ENABLED: u32 = 3; - const ALLOWED_ANCESTRY: u32 = 3; fn get_parent_hash(hash: Hash) -> Hash { @@ -100,7 +98,7 @@ async fn update_view( parent, RuntimeApiRequest::Version(tx), )) => { - tx.send(Ok(API_VERSION_PROSPECTIVE_ENABLED)).unwrap(); + tx.send(Ok(RuntimeApiRequest::VALIDITY_CONSTRAINTS)).unwrap(); (parent, new_view.get(&parent).copied().expect("Unknown parent requested")) } ); diff --git a/node/network/protocol/src/lib.rs b/node/network/protocol/src/lib.rs index 9221b6e3f785..7e08d22697f5 100644 --- a/node/network/protocol/src/lib.rs +++ b/node/network/protocol/src/lib.rs @@ -592,7 +592,7 @@ pub mod vstaging { use parity_scale_codec::{Decode, Encode}; use polkadot_primitives::vstaging::{ - CandidateIndex, CollatorId, CollatorSignature, Hash, Id as ParaId, + CandidateHash, CandidateIndex, CollatorId, CollatorSignature, Hash, Id as ParaId, UncheckedSignedAvailabilityBitfield, }; From 50de99bb9345e72834b3a572ba64f9406e281495 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 5 Oct 2022 14:18:29 +0300 Subject: [PATCH 32/45] style fixes --- .../collator-protocol/src/collator_side/collation.rs | 12 ++++++++++++ node/network/collator-protocol/src/lib.rs | 5 ----- .../src/validator_side/collation.rs | 12 ++++++++++++ .../protocol/src/request_response/outgoing.rs | 7 ++++--- node/overseer/src/lib.rs | 2 +- node/service/src/overseer.rs | 8 +++++++- node/subsystem-types/src/messages.rs | 2 +- 7 files changed, 37 insertions(+), 11 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/collation.rs b/node/network/collator-protocol/src/collator_side/collation.rs index 26a8a92d957b..36cdc7794b68 100644 --- a/node/network/collator-protocol/src/collator_side/collation.rs +++ b/node/network/collator-protocol/src/collator_side/collation.rs @@ -58,9 +58,13 @@ impl CollationStatus { /// A collation built by the collator. pub struct Collation { + /// Candidate receipt. pub receipt: CandidateReceipt, + /// Parent head-data hash. pub parent_head_data_hash: Hash, + /// Proof to verify the state transition of the parachain. pub pov: PoV, + /// Collation status. pub status: CollationStatus, } @@ -103,6 +107,7 @@ impl From> } impl VersionedCollationRequest { + /// Returns parachain id from the request payload. pub fn para_id(&self) -> ParaId { match self { VersionedCollationRequest::V1(req) => req.payload.para_id, @@ -110,6 +115,7 @@ impl VersionedCollationRequest { } } + /// Returns relay parent from the request payload. pub fn relay_parent(&self) -> Hash { match self { VersionedCollationRequest::V1(req) => req.payload.relay_parent, @@ -117,6 +123,7 @@ impl VersionedCollationRequest { } } + /// Returns id of the peer the request was received from. pub fn peer_id(&self) -> PeerId { match self { VersionedCollationRequest::V1(req) => req.peer, @@ -124,6 +131,7 @@ impl VersionedCollationRequest { } } + /// Sends the response back to requester. pub fn send_outgoing_response( self, response: OutgoingResponse, @@ -141,9 +149,13 @@ impl VersionedCollationRequest { /// aborted, it only indicates that we should start processing /// the next one from the queue. pub struct CollationSendResult { + /// Candidate's relay parent. pub relay_parent: Hash, + /// Candidate hash. pub candidate_hash: CandidateHash, + /// Peer id. pub peer_id: PeerId, + /// Whether the max unshared timeout was hit. pub timed_out: bool, } diff --git a/node/network/collator-protocol/src/lib.rs b/node/network/collator-protocol/src/lib.rs index c608aed8cefd..cf941b69c617 100644 --- a/node/network/collator-protocol/src/lib.rs +++ b/node/network/collator-protocol/src/lib.rs @@ -188,11 +188,6 @@ async fn prospective_parachains_mode( where Sender: polkadot_node_subsystem::CollatorProtocolSenderTrait, { - // TODO: call a Runtime API once staging version is available - // https://github.com/paritytech/substrate/discussions/11338 - // - // Implementation should be shared with backing & provisioner. - let (tx, rx) = futures::channel::oneshot::channel(); sender .send_message(RuntimeApiMessage::Request(leaf_hash, RuntimeApiRequest::Version(tx))) diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 2a181c5a50f9..ae4a17c52d80 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -39,7 +39,9 @@ use crate::{ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; /// Candidate supplied with a para head it's built on top of. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] pub struct ProspectiveCandidate { + /// Candidate hash. pub candidate_hash: CandidateHash, + /// Parent head-data hash as supplied in advertisement. pub parent_head_data_hash: Hash, } @@ -52,9 +54,13 @@ impl ProspectiveCandidate { /// Identifier of a fetched collation. #[derive(Debug, Clone, Hash, Eq, PartialEq)] pub struct FetchedCollation { + /// Candidate's relay parent. pub relay_parent: Hash, + /// Parachain id. pub para_id: ParaId, + /// Candidate hash. pub candidate_hash: CandidateHash, + /// Id of the collator the collation was fetched from. pub collator_id: CollatorId, } @@ -73,10 +79,16 @@ impl From<&CandidateReceipt> for FetchedCollation { /// Identifier of a collation being requested. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] pub struct PendingCollation { + /// Candidate's relay parent. pub relay_parent: Hash, + /// Parachain id. pub para_id: ParaId, + /// Peer that advertised this collation. pub peer_id: PeerId, + /// Optional candidate hash and parent head-data hash if were + /// supplied in advertisement. pub prospective_candidate: Option, + /// Hash of the candidate's commitments. pub commitments_hash: Option, } diff --git a/node/network/protocol/src/request_response/outgoing.rs b/node/network/protocol/src/request_response/outgoing.rs index 76a0b509ab60..e01111d1cf12 100644 --- a/node/network/protocol/src/request_response/outgoing.rs +++ b/node/network/protocol/src/request_response/outgoing.rs @@ -32,9 +32,6 @@ pub enum Requests { ChunkFetchingV1(OutgoingRequest), /// Fetch a collation from a collator which previously announced it. CollationFetchingV1(OutgoingRequest), - /// Fetch a collation from a collator which previously announced it. - /// Compared to V1 it requires specifying which candidate is requested by its hash. - CollationFetchingVStaging(OutgoingRequest), /// Fetch a PoV from a validator which previously sent out a seconded statement. PoVFetchingV1(OutgoingRequest), /// Request full available data from a node. @@ -43,6 +40,10 @@ pub enum Requests { StatementFetchingV1(OutgoingRequest), /// Requests for notifying about an ongoing dispute. DisputeSendingV1(OutgoingRequest), + + /// Fetch a collation from a collator which previously announced it. + /// Compared to V1 it requires specifying which candidate is requested by its hash. + CollationFetchingVStaging(OutgoingRequest), } impl Requests { diff --git a/node/overseer/src/lib.rs b/node/overseer/src/lib.rs index b6cdafd50cf1..188dae751101 100644 --- a/node/overseer/src/lib.rs +++ b/node/overseer/src/lib.rs @@ -555,7 +555,7 @@ pub struct Overseer { RuntimeApiMessage, CandidateBackingMessage, ChainApiMessage, - ProspectiveParachainsMessage + ProspectiveParachainsMessage, ])] collator_protocol: CollatorProtocol, diff --git a/node/service/src/overseer.rs b/node/service/src/overseer.rs index 3f369531ffc8..ef7c8de74e9c 100644 --- a/node/service/src/overseer.rs +++ b/node/service/src/overseer.rs @@ -95,15 +95,21 @@ where pub network_service: Arc>, /// Underlying authority discovery service. pub authority_discovery_service: AuthorityDiscoveryService, - /// POV request receiver + /// POV request receiver. pub pov_req_receiver: IncomingRequestReceiver, + /// Erasure chunks request receiver. pub chunk_req_receiver: IncomingRequestReceiver, + /// Collations request receiver for network protocol v1. pub collation_req_v1_receiver: IncomingRequestReceiver, + /// Collations request receiver for network protocol vstaging. pub collation_req_vstaging_receiver: IncomingRequestReceiver, + /// Receiver for available data requests. pub available_data_req_receiver: IncomingRequestReceiver, + /// Receiver for incoming large statement requests. pub statement_req_receiver: IncomingRequestReceiver, + /// Receiver for incoming disputes. pub dispute_req_receiver: IncomingRequestReceiver, /// Prometheus registry, commonly used for production systems, less so for test. pub registry: Option<&'a Registry>, diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index 1e40b6a35834..a63b624081be 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -195,7 +195,7 @@ pub enum CollatorProtocolMessage { /// This should be sent before any `DistributeCollation` message. CollateOn(ParaId), /// Provide a collation to distribute to validators with an optional result sender. - /// The second argument is the hash of parachain head before candidate's execution. + /// The second argument is the parent head-data hash. /// /// The result sender should be informed when at least one parachain validator seconded the collation. It is also /// completely okay to just drop the sender. From 6baac29bf71de0ed3a352ab8c5dc5d92c56887d5 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 5 Oct 2022 14:20:11 +0300 Subject: [PATCH 33/45] downgrade log --- node/network/collator-protocol/src/collator_side/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index dfab31b6d726..6e63c21c9630 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -592,11 +592,11 @@ async fn advertise_collation( // Check that peer will be able to request the collation. if let CollationVersion::V1 = protocol_version { if per_relay_parent.prospective_parachains_mode.is_enabled() { - gum::debug!( + gum::trace!( target: LOG_TARGET, ?relay_parent, peer_id = %peer, - "Skipping advertisement to validator, incorrect network protocol version", + "Skipping advertising to validator, incorrect network protocol version", ); return } From 8acca051a41ee1d10ec2894cdb65ce650f5f22af Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 5 Oct 2022 21:29:48 +0300 Subject: [PATCH 34/45] Track implicit assignments --- .../src/validator_side/mod.rs | 90 +++++++++++++------ .../tests/prospective_parachains.rs | 26 ++++++ 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 9a9b80b39090..ca49bf347b3b 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -372,9 +372,19 @@ impl AssignedCoreState { } } -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] struct GroupAssignments { + /// Current assignment. current: Option<(ParaId, AssignedCoreState)>, + /// Paras we're implicitly assigned to with respect to ancestry. + /// This only includes paras from children relay chain blocks assignments. + /// + /// Implicit assignments are not reference-counted since they're accumulated + /// from the most recent leaf. + /// + /// Should be relatively small depending on the group rotation frequency and + /// allowed ancestry length. + implicit: Vec, } struct PerRelayParent { @@ -387,7 +397,7 @@ impl PerRelayParent { fn new(mode: ProspectiveParachainsMode) -> Self { Self { prospective_parachains_mode: mode, - assignment: GroupAssignments { current: None }, + assignment: GroupAssignments { current: None, implicit: Vec::new() }, collations: Collations::default(), } } @@ -536,7 +546,7 @@ where } } - *group_assignment = GroupAssignments { current: para_now }; + *group_assignment = GroupAssignments { current: para_now, implicit: Vec::new() }; Ok(()) } @@ -545,7 +555,7 @@ fn remove_outgoing( current_assignments: &mut HashMap, per_relay_parent: PerRelayParent, ) { - let GroupAssignments { current } = per_relay_parent.assignment; + let GroupAssignments { current, .. } = per_relay_parent.assignment; if let Some((cur, _)) = current { if let Entry::Occupied(mut occupied) = current_assignments.entry(cur) { @@ -1010,20 +1020,27 @@ where .ok_or(AdvertisementError::RelayParentUnknown)?; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; - let assignment = per_relay_parent.assignment; + let assignment = &per_relay_parent.assignment; let peer_data = state.peer_data.get_mut(&peer_id).ok_or(AdvertisementError::UnknownPeer)?; - let para_id = peer_data.collating_para().ok_or(AdvertisementError::UndeclaredCollator)?; - - let core_state = match assignment.current { - Some((id, core_state)) if id == para_id => core_state, + let collator_para_id = + peer_data.collating_para().ok_or(AdvertisementError::UndeclaredCollator)?; + + match assignment.current { + Some((id, core_state)) if id == collator_para_id => { + // Disallow building on top occupied core if async + // backing is disabled. + if !relay_parent_mode.is_enabled() && core_state.is_occupied() { + return Err(AdvertisementError::CoreOccupied) + } + }, + _ if assignment.implicit.contains(&collator_para_id) => { + // This relay parent is a part of implicit ancestry, + // thus async backing is enabled. + }, _ => return Err(AdvertisementError::InvalidAssignment), }; - if !relay_parent_mode.is_enabled() && core_state.is_occupied() { - return Err(AdvertisementError::CoreOccupied) - } - // TODO: only fetch a collation if it's built on top of backed nodes in fragment tree. // https://github.com/paritytech/polkadot/issues/5923 let is_seconding_allowed = match (relay_parent_mode, prospective_candidate) { @@ -1035,7 +1052,7 @@ where relay_parent, candidate_hash, parent_head_data_hash, - para_id, + collator_para_id, active_leaves, ) .await @@ -1155,6 +1172,9 @@ where .await?; state.active_leaves.insert(*leaf, mode); + + let mut implicit_assignment = + Vec::from_iter(per_relay_parent.assignment.current.map(|(para, _)| para)); state.per_relay_parent.insert(*leaf, per_relay_parent); if mode.is_enabled() { @@ -1164,25 +1184,43 @@ where .await .map_err(Error::ImplicitViewFetchError)?; + // Order is always descending. let allowed_ancestry = state .implicit_view .known_allowed_relay_parents_under(leaf, None) .unwrap_or_default(); for block_hash in allowed_ancestry { - if let Entry::Vacant(entry) = state.per_relay_parent.entry(*block_hash) { - let mut per_relay_parent = - PerRelayParent::new(ProspectiveParachainsMode::Enabled); - assign_incoming( - sender, - &mut per_relay_parent.assignment, - &mut state.current_assignments, - keystore, - *block_hash, - ) - .await?; + let entry = match state.per_relay_parent.entry(*block_hash) { + Entry::Vacant(entry) => { + let mut per_relay_parent = + PerRelayParent::new(ProspectiveParachainsMode::Enabled); + assign_incoming( + sender, + &mut per_relay_parent.assignment, + &mut state.current_assignments, + keystore, + *block_hash, + ) + .await?; + + entry.insert(per_relay_parent) + }, + Entry::Occupied(entry) => entry.into_mut(), + }; - entry.insert(per_relay_parent); + let current = entry.assignment.current.map(|(para, _)| para); + let implicit = &mut entry.assignment.implicit; + + // Extend implicitly assigned parachains. + for para in &implicit_assignment { + if !implicit.contains(para) { + implicit.push(*para); + } } + // Current assignment propagates to parents, meaning that a parachain + // we're assigned to in fresh blocks can submit collations built + // on top of relay parents in the allowed ancestry, but not vice versa. + implicit_assignment.extend(current); } } } diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs index a40474ac3c9c..f85e846889e9 100644 --- a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -325,6 +325,32 @@ fn accept_advertisements_from_implicit_view() { let candidate_hash = CandidateHash::default(); let parent_head_data_hash = Hash::zero(); + advertise_collation( + &mut virtual_overseer, + peer_b, + head_c, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; + + let response_channel = assert_fetch_collation_request( + &mut virtual_overseer, + head_c, + test_state.chain_ids[1], + Some(candidate_hash), + ) + .await; + + // Respond with an error to abort seconding. + response_channel + .send(Err(sc_network::RequestFailure::NotConnected)) + .expect("Sending response should succeed"); + assert_matches!( + overseer_recv(&mut virtual_overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ReportPeer(..),) + ); + + // Advertise with different para. advertise_collation( &mut virtual_overseer, peer_a, From c07c0463df45a76e3d534be9cafd8b36e8591222 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 5 Oct 2022 21:40:23 +0300 Subject: [PATCH 35/45] Limit the number of seconded candidates per para --- .../src/validator_side/collation.rs | 30 +++++++++++-------- .../src/validator_side/mod.rs | 6 ++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index ae4a17c52d80..29e32e318e5e 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -28,7 +28,7 @@ //! └─▶Advertised ─▶ Pending ─▶ Fetched ─▶ Validated use futures::channel::oneshot; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use polkadot_node_network_protocol::PeerId; use polkadot_node_primitives::PoV; @@ -160,14 +160,14 @@ pub struct Collations { pub fetching_from: Option, /// Collation that were advertised to us, but we did not yet fetch. pub waiting_queue: VecDeque<(PendingCollation, CollatorId)>, - /// How many collations have been seconded. - pub seconded_count: usize, + /// How many collations have been seconded per parachain. + pub seconded_count: HashMap, } impl Collations { /// Note a seconded collation for a given para. - pub(super) fn note_seconded(&mut self) { - self.seconded_count += 1 + pub(super) fn note_seconded(&mut self, para_id: ParaId) { + *self.seconded_count.entry(para_id).or_insert(0) += 1 } /// Returns the next collation to fetch from the `waiting_queue`. @@ -197,12 +197,17 @@ impl Collations { match self.status { // We don't need to fetch any other collation when we already have seconded one. CollationStatus::Seconded => None, - CollationStatus::Waiting => - if !self.is_seconded_limit_reached(relay_parent_mode) { - None - } else { - self.waiting_queue.pop_front() - }, + CollationStatus::Waiting => { + while let Some(next) = self.waiting_queue.pop_front() { + let para_id = next.0.para_id; + if !self.is_seconded_limit_reached(relay_parent_mode, para_id) { + continue + } + + return Some(next) + } + None + }, CollationStatus::WaitingOnValidation | CollationStatus::Fetching => unreachable!("We have reset the status above!"), } @@ -212,9 +217,10 @@ impl Collations { pub(super) fn is_seconded_limit_reached( &self, relay_parent_mode: ProspectiveParachainsMode, + para_id: ParaId, ) -> bool { let seconded_limit = if relay_parent_mode.is_enabled() { MAX_CANDIDATE_DEPTH + 1 } else { 1 }; - self.seconded_count < seconded_limit + self.seconded_count.get(¶_id).map_or(true, |&num| num < seconded_limit) } } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index ca49bf347b3b..1d888807d79a 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -1090,7 +1090,7 @@ where }); let collations = &mut per_relay_parent.collations; - if !collations.is_seconded_limit_reached(relay_parent_mode) { + if !collations.is_seconded_limit_reached(relay_parent_mode, collator_para_id) { return Err(AdvertisementError::SecondedLimitReached) } @@ -1382,7 +1382,7 @@ async fn process_msg( let fetched_collation = FetchedCollation::from(&receipt.to_plain()); if let Some(collation_event) = state.fetched_candidates.remove(&fetched_collation) { let (collator_id, pending_collation) = collation_event; - let PendingCollation { relay_parent, peer_id, .. } = pending_collation; + let PendingCollation { relay_parent, peer_id, para_id, .. } = pending_collation; note_good_collation(ctx.sender(), &state.peer_data, collator_id.clone()).await; if let Some(peer_data) = state.peer_data.get(&peer_id) { notify_collation_seconded( @@ -1397,7 +1397,7 @@ async fn process_msg( if let Some(state) = state.per_relay_parent.get_mut(&parent) { state.collations.status = CollationStatus::Seconded; - state.collations.note_seconded(); + state.collations.note_seconded(para_id); } // If async backing is enabled, make an attempt to fetch next collation. dequeue_next_collation_and_fetch(ctx, state, parent, collator_id).await; From 358262201657bbee452feea45ba9e952feb491a2 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 5 Oct 2022 23:27:48 +0300 Subject: [PATCH 36/45] Add a sanity check --- .../src/validator_side/collation.rs | 36 ++++- .../src/validator_side/mod.rs | 27 +++- .../src/validator_side/tests/mod.rs | 16 ++- .../tests/prospective_parachains.rs | 131 ++++++++++++++++-- 4 files changed, 183 insertions(+), 27 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 29e32e318e5e..17c8b6538d6d 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -32,7 +32,9 @@ use std::collections::{HashMap, VecDeque}; use polkadot_node_network_protocol::PeerId; use polkadot_node_primitives::PoV; -use polkadot_primitives::v2::{CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId}; +use polkadot_primitives::v2::{ + CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId, PersistedValidationData, +}; use crate::{ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; @@ -109,6 +111,38 @@ impl PendingCollation { } } +/// An error indicating a mismatch between an advertisement +/// and the collation we've received. +#[derive(Debug)] +pub enum FetchedCollationMismatchError { + /// Persisted validation data hash doesn't match + /// the one in the candidate receipt. + PersistedValidationDataHash, + /// Candidate receipt hash mismatch. + CandidateHash, +} + +/// Performs a sanity check between advertised and fetched collations. +/// +/// Since the persisted validation data is constructed using the advertised +/// parent head data hash, the latter doesn't require an additional check. +pub fn fetched_collation_sanity_check( + advertised: &PendingCollation, + fetched: &CandidateReceipt, + persisted_validation_data: &PersistedValidationData, +) -> Result<(), FetchedCollationMismatchError> { + if persisted_validation_data.hash() != fetched.descriptor().persisted_validation_data_hash { + Err(FetchedCollationMismatchError::PersistedValidationDataHash) + } else if advertised + .prospective_candidate + .map_or(false, |pc| pc.candidate_hash() != fetched.hash()) + { + Err(FetchedCollationMismatchError::CandidateHash) + } else { + Ok(()) + } +} + pub type CollationEvent = (CollatorId, PendingCollation); pub type PendingCollationFetch = diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 1d888807d79a..4555bdd9043e 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -72,8 +72,8 @@ mod collation; mod metrics; use collation::{ - CollationEvent, CollationStatus, Collations, FetchedCollation, PendingCollation, - PendingCollationFetch, ProspectiveCandidate, + fetched_collation_sanity_check, CollationEvent, CollationStatus, Collations, FetchedCollation, + PendingCollation, PendingCollationFetch, ProspectiveCandidate, }; #[cfg(test)] @@ -1659,9 +1659,6 @@ async fn handle_collation_fetched_result( }; let collations = &mut per_relay_parent.collations; - // There's always a single collation being fetched at any moment of time. - // In case of a failure, we reset the status back to waiting. - collations.status = CollationStatus::WaitingOnValidation; let fetched_collation = FetchedCollation::from(&candidate_receipt); if let Entry::Vacant(entry) = state.fetched_candidates.entry(fetched_collation) { @@ -1706,12 +1703,27 @@ async fn handle_collation_fetched_result( ?relay_parent_mode, candidate = ?candidate_receipt.hash(), "Failed to fetch persisted validation data due to an error: {}", - err + err, ); return }, }; + if let Err(err) = + fetched_collation_sanity_check(&collation_event.1, &candidate_receipt, &pvd) + { + gum::warn!( + target: LOG_TARGET, + ?relay_parent, + ?para_id, + candidate = ?candidate_receipt.hash(), + "Collation sanity check failed with an error: {:?}", + err, + ); + modify_reputation(ctx.sender(), collation_event.1.peer_id, COST_REPORT_BAD).await; + return + } + ctx.send_message(CandidateBackingMessage::Second( relay_parent, candidate_receipt, @@ -1719,6 +1731,9 @@ async fn handle_collation_fetched_result( pov, )) .await; + // There's always a single collation being fetched at any moment of time. + // In case of a failure, we reset the status back to waiting. + collations.status = CollationStatus::WaitingOnValidation; entry.insert(collation_event); } else { diff --git a/node/network/collator-protocol/src/validator_side/tests/mod.rs b/node/network/collator-protocol/src/validator_side/tests/mod.rs index 47b6617cf5e8..088348443409 100644 --- a/node/network/collator-protocol/src/validator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/validator_side/tests/mod.rs @@ -47,6 +47,15 @@ const DECLARE_TIMEOUT: Duration = Duration::from_millis(25); const API_VERSION_PROSPECTIVE_DISABLED: u32 = 2; +fn dummy_pvd() -> PersistedValidationData { + PersistedValidationData { + parent_head: HeadData(vec![7, 8, 9]), + relay_parent_number: 5, + max_pov_size: 1024, + relay_parent_storage_root: Default::default(), + } +} + #[derive(Clone)] struct TestState { chain_ids: Vec, @@ -253,12 +262,7 @@ async fn assert_candidate_backing_second( expected_pov: &PoV, mode: ProspectiveParachainsMode, ) -> CandidateReceipt { - let pvd = PersistedValidationData { - parent_head: HeadData(vec![7, 8, 9]), - relay_parent_number: 5, - max_pov_size: 1024, - relay_parent_storage_root: Default::default(), - }; + let pvd = dummy_pvd(); // Depending on relay parent mode pvd will be either requested // from the Runtime API or Prospective Parachains. diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs index f85e846889e9..7b8ccb5c2b52 100644 --- a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -404,7 +404,21 @@ fn second_multiple_candidates_per_relay_parent() { .await; for i in 0..(MAX_CANDIDATE_DEPTH + 1) { - let candidate_hash = CandidateHash(Hash::repeat_byte(i as u8)); + let mut candidate = dummy_candidate_receipt_bad_sig(head_c, Some(Default::default())); + candidate.descriptor.para_id = test_state.chain_ids[0]; + candidate.descriptor.relay_parent = head_c; + candidate.descriptor.persisted_validation_data_hash = dummy_pvd().hash(); + let commitments = CandidateCommitments { + head_data: HeadData(vec![i as u8]), + horizontal_messages: Vec::new(), + upward_messages: Vec::new(), + new_validation_code: None, + processed_downward_messages: 0, + hrmp_watermark: 0, + }; + candidate.commitments_hash = commitments.hash(); + + let candidate_hash = candidate.hash(); let parent_head_data_hash = Hash::zero(); advertise_collation( @@ -423,19 +437,7 @@ fn second_multiple_candidates_per_relay_parent() { ) .await; - let pov = PoV { block_data: BlockData(vec![]) }; - let mut candidate = dummy_candidate_receipt_bad_sig(head_c, Some(Default::default())); - candidate.descriptor.para_id = test_state.chain_ids[0]; - candidate.descriptor.relay_parent = head_c; - let commitments = CandidateCommitments { - head_data: HeadData(vec![1, 2, 3]), - horizontal_messages: Vec::new(), - upward_messages: Vec::new(), - new_validation_code: None, - processed_downward_messages: 0, - hrmp_watermark: 0, - }; - candidate.commitments_hash = commitments.hash(); + let pov = PoV { block_data: BlockData(vec![1]) }; response_channel .send(Ok(request_vstaging::CollationFetchingResponse::Collation( @@ -512,3 +514,104 @@ fn second_multiple_candidates_per_relay_parent() { virtual_overseer }); } + +#[test] +fn fetched_collation_sanity_check() { + let test_state = TestState::default(); + + test_harness(|test_harness| async move { + let TestHarness { mut virtual_overseer, .. } = test_harness; + + let pair = CollatorPair::generate().0; + + // Grandparent of head `a`. + let head_b = Hash::from_low_u64_be(128); + let head_b_num: u32 = 2; + + // Grandparent of head `b`. + // Group rotation frequency is 1 by default, at `c` we're assigned + // to the first para. + let head_c = Hash::from_low_u64_be(130); + + // Activated leaf is `b`, but the collation will be based on `c`. + update_view(&mut virtual_overseer, &test_state, vec![(head_b, head_b_num)], 1).await; + + let peer_a = PeerId::random(); + + connect_and_declare_collator( + &mut virtual_overseer, + peer_a, + pair.clone(), + test_state.chain_ids[0], + CollationVersion::VStaging, + ) + .await; + + let mut candidate = dummy_candidate_receipt_bad_sig(head_c, Some(Default::default())); + candidate.descriptor.para_id = test_state.chain_ids[0]; + candidate.descriptor.relay_parent = head_c; + let commitments = CandidateCommitments { + head_data: HeadData(vec![1, 2, 3]), + horizontal_messages: Vec::new(), + upward_messages: Vec::new(), + new_validation_code: None, + processed_downward_messages: 0, + hrmp_watermark: 0, + }; + candidate.commitments_hash = commitments.hash(); + + let candidate_hash = CandidateHash(Hash::zero()); + let parent_head_data_hash = Hash::zero(); + + advertise_collation( + &mut virtual_overseer, + peer_a, + head_c, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; + + let response_channel = assert_fetch_collation_request( + &mut virtual_overseer, + head_c, + test_state.chain_ids[0], + Some(candidate_hash), + ) + .await; + + let pov = PoV { block_data: BlockData(vec![1]) }; + + response_channel + .send(Ok(request_vstaging::CollationFetchingResponse::Collation( + candidate.clone(), + pov.clone(), + ) + .encode())) + .expect("Sending response should succeed"); + + // PVD request. + assert_matches!( + overseer_recv(&mut virtual_overseer).await, + AllMessages::ProspectiveParachains( + ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx), + ) => { + assert_eq!(head_c, request.candidate_relay_parent); + assert_eq!(test_state.chain_ids[0], request.para_id); + tx.send(Some(dummy_pvd())).unwrap(); + } + ); + + // Reported malicious. + assert_matches!( + overseer_recv(&mut virtual_overseer).await, + AllMessages::NetworkBridgeTx( + NetworkBridgeTxMessage::ReportPeer(peer_id, rep), + ) => { + assert_eq!(peer_a, peer_id); + assert_eq!(rep, COST_REPORT_BAD); + } + ); + + virtual_overseer + }); +} From 67f6d113c471c4823b70af538817c29fe41b5894 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 6 Oct 2022 16:05:31 +0300 Subject: [PATCH 37/45] Handle fetched candidate --- node/network/collator-protocol/src/error.rs | 43 +++++- .../src/validator_side/collation.rs | 19 +-- .../src/validator_side/mod.rs | 126 +++++++----------- .../tests/prospective_parachains.rs | 17 ++- 4 files changed, 94 insertions(+), 111 deletions(-) diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index be3ccbc2c27a..812811fd4631 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -55,12 +55,6 @@ pub enum Error { #[error("Response receiver for Runtime API version request cancelled")] CancelledRuntimeApiVersion(oneshot::Canceled), - #[error("Response receiver for persisted validation data request cancelled")] - CancelledRuntimePersistedValidationData(oneshot::Canceled), - - #[error("Response receiver for prospective validation data request cancelled")] - CancelledProspectiveValidationData(oneshot::Canceled), - #[error("Response receiver for active validators request cancelled")] CancelledActiveValidators(oneshot::Canceled), @@ -74,6 +68,43 @@ pub enum Error { InvalidStatementSignature(UncheckedSignedFullStatement), } +/// An error happened on the validator side of the protocol when attempting +/// to start seconding a candidate. +#[derive(Debug, thiserror::Error)] +pub enum SecondingError { + #[error("Failed to fetch a collation")] + FailedToFetch(#[from] oneshot::Canceled), + + #[error("Error while accessing Runtime API")] + RuntimeApi(#[from] RuntimeApiError), + + #[error("Response receiver for persisted validation data request cancelled")] + CancelledRuntimePersistedValidationData(oneshot::Canceled), + + #[error("Response receiver for prospective validation data request cancelled")] + CancelledProspectiveValidationData(oneshot::Canceled), + + #[error("Persisted validation data is not available")] + PersistedValidationDataNotFound, + + #[error("Persisted validation data hash doesn't match one in the candidate receipt.")] + PersistedValidationDataMismatch, + + #[error("Candidate hash doesn't match the advertisement")] + CandidateHashMismatch, + + #[error("Received duplicate collation from the peer")] + Duplicate, +} + +impl SecondingError { + /// Returns true if an error indicates that a peer is malicious. + pub fn is_malicious(&self) -> bool { + use SecondingError::*; + matches!(self, PersistedValidationDataMismatch | CandidateHashMismatch | Duplicate) + } +} + /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 17c8b6538d6d..23fe2554c6a3 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -36,7 +36,7 @@ use polkadot_primitives::v2::{ CandidateHash, CandidateReceipt, CollatorId, Hash, Id as ParaId, PersistedValidationData, }; -use crate::{ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; +use crate::{error::SecondingError, ProspectiveParachainsMode, LOG_TARGET, MAX_CANDIDATE_DEPTH}; /// Candidate supplied with a para head it's built on top of. #[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] @@ -111,17 +111,6 @@ impl PendingCollation { } } -/// An error indicating a mismatch between an advertisement -/// and the collation we've received. -#[derive(Debug)] -pub enum FetchedCollationMismatchError { - /// Persisted validation data hash doesn't match - /// the one in the candidate receipt. - PersistedValidationDataHash, - /// Candidate receipt hash mismatch. - CandidateHash, -} - /// Performs a sanity check between advertised and fetched collations. /// /// Since the persisted validation data is constructed using the advertised @@ -130,14 +119,14 @@ pub fn fetched_collation_sanity_check( advertised: &PendingCollation, fetched: &CandidateReceipt, persisted_validation_data: &PersistedValidationData, -) -> Result<(), FetchedCollationMismatchError> { +) -> Result<(), SecondingError> { if persisted_validation_data.hash() != fetched.descriptor().persisted_validation_data_hash { - Err(FetchedCollationMismatchError::PersistedValidationDataHash) + Err(SecondingError::PersistedValidationDataMismatch) } else if advertised .prospective_candidate .map_or(false, |pc| pc.candidate_hash() != fetched.hash()) { - Err(FetchedCollationMismatchError::CandidateHash) + Err(SecondingError::CandidateHashMismatch) } else { Ok(()) } diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 4555bdd9043e..840d31c8a199 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -61,7 +61,7 @@ use polkadot_primitives::v2::{ OccupiedCoreAssumption, PersistedValidationData, }; -use crate::error::{Error, Result}; +use crate::error::{Error, Result, SecondingError}; use super::{ modify_reputation, prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET, @@ -1491,7 +1491,23 @@ pub(crate) async fn run( disconnect_inactive_peers(ctx.sender(), &eviction_policy, &state.peer_data).await; } res = state.collation_fetches.select_next_some() => { - handle_collation_fetched_result(&mut ctx, &mut state, res).await; + let (collator_id, pc) = res.0.clone(); + if let Err(err) = kick_off_seconding(&mut ctx, &mut state, res).await { + gum::warn!( + target: LOG_TARGET, + relay_parent = ?pc.relay_parent, + para_id = ?pc.para_id, + peer_id = ?pc.peer_id, + error = %err, + "Seconding aborted due to an error", + ); + + if err.is_malicious() { + // Report malicious peer. + modify_reputation(ctx.sender(), pc.peer_id, COST_REPORT_BAD).await; + } + dequeue_next_collation_and_fetch(&mut ctx, &mut state, pc.relay_parent, collator_id).await; + } } res = state.collation_fetch_timeouts.select_next_some() => { let (collator_id, relay_parent) = res; @@ -1579,7 +1595,7 @@ async fn request_persisted_validation_data( sender: &mut Sender, relay_parent: Hash, para_id: ParaId, -) -> Result> +) -> std::result::Result, SecondingError> where Sender: CollatorProtocolSenderTrait, { @@ -1592,8 +1608,8 @@ where ) .await .await - .map_err(Error::CancelledRuntimePersistedValidationData)? - .map_err(Error::RuntimeApi) + .map_err(SecondingError::CancelledRuntimePersistedValidationData)? + .map_err(SecondingError::RuntimeApi) } async fn request_prospective_validation_data( @@ -1601,7 +1617,7 @@ async fn request_prospective_validation_data( candidate_relay_parent: Hash, parent_head_data_hash: Hash, para_id: ParaId, -) -> Result> +) -> std::result::Result, SecondingError> where Sender: CollatorProtocolSenderTrait, { @@ -1614,115 +1630,67 @@ where .send_message(ProspectiveParachainsMessage::GetProspectiveValidationData(request, tx)) .await; - rx.await.map_err(Error::CancelledProspectiveValidationData) + rx.await.map_err(SecondingError::CancelledProspectiveValidationData) } /// Handle a fetched collation result. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] -async fn handle_collation_fetched_result( +async fn kick_off_seconding( ctx: &mut Context, state: &mut State, (mut collation_event, res): PendingCollationFetch, -) { +) -> std::result::Result<(), SecondingError> { let relay_parent = collation_event.1.relay_parent; let para_id = collation_event.1.para_id; let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { Some(state) => state, None => { + // Relay parent went out of view, not an error. gum::trace!( target: LOG_TARGET, relay_parent = ?relay_parent, "Fetched collation for a parent out of view", ); - return + return Ok(()) }, }; + let collations = &mut per_relay_parent.collations; let relay_parent_mode = per_relay_parent.prospective_parachains_mode; - let (candidate_receipt, pov) = match res { - Ok(res) => res, - Err(e) => { - gum::debug!( - target: LOG_TARGET, - relay_parent = ?collation_event.1.relay_parent, - para_id = ?para_id, - peer_id = ?collation_event.1.peer_id, - collator_id = ?collation_event.0, - error = ?e, - "Failed to fetch collation.", - ); - - dequeue_next_collation_and_fetch(ctx, state, relay_parent, collation_event.0).await; - return - }, - }; - - let collations = &mut per_relay_parent.collations; + let (candidate_receipt, pov) = res?; let fetched_collation = FetchedCollation::from(&candidate_receipt); if let Entry::Vacant(entry) = state.fetched_candidates.entry(fetched_collation) { collation_event.1.commitments_hash = Some(candidate_receipt.commitments_hash); - let result = match collation_event.1.prospective_candidate { - Some(ProspectiveCandidate { parent_head_data_hash, .. }) => + let pvd = match (relay_parent_mode, collation_event.1.prospective_candidate) { + ( + ProspectiveParachainsMode::Enabled, + Some(ProspectiveCandidate { parent_head_data_hash, .. }), + ) => request_prospective_validation_data( ctx.sender(), relay_parent, parent_head_data_hash, para_id, ) - .await, - None => + .await?, + (ProspectiveParachainsMode::Disabled, _) => request_persisted_validation_data( ctx.sender(), candidate_receipt.descriptor().relay_parent, candidate_receipt.descriptor().para_id, ) - .await, - }; - - let pvd = match result { - Ok(Some(pvd)) => pvd, - Ok(None) => { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - ?para_id, - ?relay_parent_mode, - candidate = ?candidate_receipt.hash(), - "Persisted validation data isn't available", - ); - return - }, - Err(err) => { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - ?para_id, - ?relay_parent_mode, - candidate = ?candidate_receipt.hash(), - "Failed to fetch persisted validation data due to an error: {}", - err, - ); - return + .await?, + _ => { + // `handle_advertisement` checks for protocol mismatch. + return Ok(()) }, - }; - - if let Err(err) = - fetched_collation_sanity_check(&collation_event.1, &candidate_receipt, &pvd) - { - gum::warn!( - target: LOG_TARGET, - ?relay_parent, - ?para_id, - candidate = ?candidate_receipt.hash(), - "Collation sanity check failed with an error: {:?}", - err, - ); - modify_reputation(ctx.sender(), collation_event.1.peer_id, COST_REPORT_BAD).await; - return } + .ok_or(SecondingError::PersistedValidationDataNotFound)?; + + fetched_collation_sanity_check(&collation_event.1, &candidate_receipt, &pvd)?; ctx.send_message(CandidateBackingMessage::Second( relay_parent, @@ -1736,13 +1704,9 @@ async fn handle_collation_fetched_result( collations.status = CollationStatus::WaitingOnValidation; entry.insert(collation_event); + Ok(()) } else { - gum::trace!( - target: LOG_TARGET, - ?relay_parent, - candidate = ?candidate_receipt.hash(), - "Trying to insert a pending candidate failed, because there is already one.", - ) + Err(SecondingError::Duplicate) } } diff --git a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs index 7b8ccb5c2b52..1eccb97cbd67 100644 --- a/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs +++ b/node/network/collator-protocol/src/validator_side/tests/prospective_parachains.rs @@ -332,6 +332,14 @@ fn accept_advertisements_from_implicit_view() { Some((candidate_hash, parent_head_data_hash)), ) .await; + // Advertise with different para. + advertise_collation( + &mut virtual_overseer, + peer_a, + head_c, + Some((candidate_hash, parent_head_data_hash)), + ) + .await; let response_channel = assert_fetch_collation_request( &mut virtual_overseer, @@ -350,15 +358,6 @@ fn accept_advertisements_from_implicit_view() { AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::ReportPeer(..),) ); - // Advertise with different para. - advertise_collation( - &mut virtual_overseer, - peer_a, - head_c, - Some((candidate_hash, parent_head_data_hash)), - ) - .await; - assert_fetch_collation_request( &mut virtual_overseer, head_c, From 4f68a612dc393d149d0cce78431f126cd65cd302 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 6 Oct 2022 16:09:49 +0300 Subject: [PATCH 38/45] fix tests --- node/network/collator-protocol/src/validator_side/tests/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/node/network/collator-protocol/src/validator_side/tests/mod.rs b/node/network/collator-protocol/src/validator_side/tests/mod.rs index 088348443409..fa59a93c4f56 100644 --- a/node/network/collator-protocol/src/validator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/validator_side/tests/mod.rs @@ -736,6 +736,7 @@ fn fetch_collations_works() { dummy_candidate_receipt_bad_sig(dummy_hash(), Some(Default::default())); candidate_a.descriptor.para_id = test_state.chain_ids[0]; candidate_a.descriptor.relay_parent = test_state.relay_parent; + candidate_a.descriptor.persisted_validation_data_hash = dummy_pvd().hash(); response_channel .send(Ok(request_v1::CollationFetchingResponse::Collation( candidate_a.clone(), @@ -837,6 +838,7 @@ fn fetch_collations_works() { dummy_candidate_receipt_bad_sig(dummy_hash(), Some(Default::default())); candidate_a.descriptor.para_id = test_state.chain_ids[0]; candidate_a.descriptor.relay_parent = second; + candidate_a.descriptor.persisted_validation_data_hash = dummy_pvd().hash(); // First request finishes now: response_channel_non_exclusive @@ -978,6 +980,7 @@ fn fetch_next_collation_on_invalid_collation() { dummy_candidate_receipt_bad_sig(dummy_hash(), Some(Default::default())); candidate_a.descriptor.para_id = test_state.chain_ids[0]; candidate_a.descriptor.relay_parent = test_state.relay_parent; + candidate_a.descriptor.persisted_validation_data_hash = dummy_pvd().hash(); response_channel .send(Ok(request_v1::CollationFetchingResponse::Collation( candidate_a.clone(), From 7a527f7acba07719c4f4a1167ac4682c89890a65 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 6 Oct 2022 17:26:09 +0300 Subject: [PATCH 39/45] Retry fetch --- node/network/collator-protocol/src/error.rs | 19 +++ .../src/validator_side/mod.rs | 113 ++++++------------ 2 files changed, 58 insertions(+), 74 deletions(-) diff --git a/node/network/collator-protocol/src/error.rs b/node/network/collator-protocol/src/error.rs index 812811fd4631..4003ac438c92 100644 --- a/node/network/collator-protocol/src/error.rs +++ b/node/network/collator-protocol/src/error.rs @@ -105,6 +105,25 @@ impl SecondingError { } } +/// A validator failed to request a collation due to an error. +#[derive(Debug, thiserror::Error)] +pub enum FetchError { + #[error("Collation was not previously advertised")] + NotAdvertised, + + #[error("Peer is unknown")] + UnknownPeer, + + #[error("Collation was already requested")] + AlreadyRequested, + + #[error("Relay parent went out of view")] + RelayParentOutOfView, + + #[error("Peer's protocol doesn't match the advertisement")] + ProtocolMismatch, +} + /// Utility for eating top level errors and log them. /// /// We basically always want to try and continue on error. This utility function is meant to diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 840d31c8a199..28d4ad6beba4 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -61,7 +61,7 @@ use polkadot_primitives::v2::{ OccupiedCoreAssumption, PersistedValidationData, }; -use crate::error::{Error, Result, SecondingError}; +use crate::error::{Error, FetchError, Result, SecondingError}; use super::{ modify_reputation, prospective_parachains_mode, ProspectiveParachainsMode, LOG_TARGET, @@ -595,39 +595,26 @@ async fn fetch_collation( state: &mut State, pc: PendingCollation, id: CollatorId, -) { +) -> std::result::Result<(), FetchError> { let (tx, rx) = oneshot::channel(); - let PendingCollation { relay_parent, para_id, peer_id, prospective_candidate, .. } = pc; + let PendingCollation { relay_parent, peer_id, prospective_candidate, .. } = pc; let candidate_hash = prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); - if let Some(peer_data) = state.peer_data.get(&peer_id) { - if peer_data.has_advertised(&relay_parent, candidate_hash) { - if request_collation(sender, state, pc, id.clone(), peer_data.version, tx).await { - let timeout = |collator_id, relay_parent| async move { - Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; - (collator_id, relay_parent) - }; - state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); - state.collation_fetches.push(rx.map(move |r| ((id, pc), r)).boxed()); - } - } else { - gum::debug!( - target: LOG_TARGET, - ?peer_id, - ?para_id, - ?relay_parent, - "Collation is not advertised for the relay parent by the peer, do not request it", - ); - } + let peer_data = state.peer_data.get(&peer_id).ok_or(FetchError::UnknownPeer)?; + + if peer_data.has_advertised(&relay_parent, candidate_hash) { + request_collation(sender, state, pc, id.clone(), peer_data.version, tx).await?; + let timeout = |collator_id, relay_parent| async move { + Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; + (collator_id, relay_parent) + }; + state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); + state.collation_fetches.push(rx.map(move |r| ((id, pc), r)).boxed()); + + Ok(()) } else { - gum::warn!( - target: LOG_TARGET, - ?peer_id, - ?para_id, - ?relay_parent, - "Requested to fetch a collation from an unknown peer", - ); + Err(FetchError::NotAdvertised) } } @@ -712,33 +699,17 @@ async fn request_collation( collator_id: CollatorId, peer_protocol_version: CollationVersion, result: oneshot::Sender<(CandidateReceipt, PoV)>, -) -> bool { +) -> std::result::Result<(), FetchError> { if state.requested_collations.contains_key(&pending_collation) { - gum::warn!( - target: LOG_TARGET, - peer_id = %pending_collation.peer_id, - %pending_collation.para_id, - ?pending_collation.relay_parent, - "collation has already been requested", - ); - return false + return Err(FetchError::AlreadyRequested) } let PendingCollation { relay_parent, para_id, peer_id, prospective_candidate, .. } = pending_collation; - let per_relay_parent = match state.per_relay_parent.get_mut(&relay_parent) { - Some(state) => state, - None => { - gum::debug!( - target: LOG_TARGET, - peer_id = %peer_id, - para_id = %para_id, - relay_parent = %relay_parent, - "Collation relay parent is unknown", - ); - return false - }, - }; + let per_relay_parent = state + .per_relay_parent + .get_mut(&relay_parent) + .ok_or(FetchError::RelayParentOutOfView)?; // Relay parent mode is checked in `handle_advertisement`. let (requests, response_recv) = match (peer_protocol_version, prospective_candidate) { @@ -762,17 +733,7 @@ async fn request_collation( let requests = Requests::CollationFetchingVStaging(req); (requests, response_recv.boxed()) }, - _ => { - gum::warn!( - target: LOG_TARGET, - peer_id = %peer_id, - %para_id, - ?relay_parent, - ?peer_protocol_version, - "Peer's protocol doesn't match the advertisement", - ); - return false - }, + _ => return Err(FetchError::ProtocolMismatch), }; let per_request = PerRequest { @@ -804,7 +765,7 @@ async fn request_collation( IfDisconnected::ImmediateError, )) .await; - true + Ok(()) } /// Networking message has been received. @@ -1109,12 +1070,12 @@ where collations.waiting_queue.push_back((pending_collation, id)); }, CollationStatus::Waiting => { - fetch_collation(sender, state, pending_collation, id).await; + let _ = fetch_collation(sender, state, pending_collation, id).await; }, CollationStatus::Seconded if relay_parent_mode.is_enabled() => { // Limit is not reached, it's allowed to second another // collation. - fetch_collation(sender, state, pending_collation, id).await; + let _ = fetch_collation(sender, state, pending_collation, id).await; }, CollationStatus::Seconded => { gum::trace!( @@ -1569,7 +1530,7 @@ async fn dequeue_next_collation_and_fetch( // The collator we tried to fetch from last. previous_fetch: CollatorId, ) { - if let Some((next, id)) = state.per_relay_parent.get_mut(&relay_parent).and_then(|state| { + while let Some((next, id)) = state.per_relay_parent.get_mut(&relay_parent).and_then(|state| { state .collations .get_next_collation_to_fetch(Some(&previous_fetch), state.prospective_parachains_mode) @@ -1580,14 +1541,18 @@ async fn dequeue_next_collation_and_fetch( ?id, "Successfully dequeued next advertisement - fetching ..." ); - fetch_collation(ctx.sender(), state, next, id).await; - } else { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - previous_collator = ?previous_fetch, - "No collations are available to fetch" - ); + if let Err(err) = fetch_collation(ctx.sender(), state, next, id).await { + gum::debug!( + target: LOG_TARGET, + relay_parent = ?next.relay_parent, + para_id = ?next.para_id, + peer_id = ?next.peer_id, + error = %err, + "Failed to request a collation, dequeueing next one", + ); + } else { + break + } } } From d87bcf326ade230b5e49e9af90632a7cd09a5164 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Thu, 6 Oct 2022 18:31:08 +0300 Subject: [PATCH 40/45] Guard against dequeueing while already fetching --- .../src/validator_side/collation.rs | 27 ++++---- .../src/validator_side/mod.rs | 61 ++++++++++++++----- 2 files changed, 63 insertions(+), 25 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/collation.rs b/node/network/collator-protocol/src/validator_side/collation.rs index 23fe2554c6a3..c5cb848d2815 100644 --- a/node/network/collator-protocol/src/validator_side/collation.rs +++ b/node/network/collator-protocol/src/validator_side/collation.rs @@ -176,11 +176,11 @@ impl CollationStatus { pub struct Collations { /// What is the current status in regards to a collation for this relay parent? pub status: CollationStatus, - /// Collator we're fetching from. + /// Collator we're fetching from, optionally which candidate was requested. /// /// This is the currently last started fetch, which did not exceed `MAX_UNSHARED_DOWNLOAD_TIME` /// yet. - pub fetching_from: Option, + pub fetching_from: Option<(CollatorId, Option)>, /// Collation that were advertised to us, but we did not yet fetch. pub waiting_queue: VecDeque<(PendingCollation, CollatorId)>, /// How many collations have been seconded per parachain. @@ -201,19 +201,24 @@ impl Collations { /// the passed in `finished_one` is the currently `waiting_collation`. pub(super) fn get_next_collation_to_fetch( &mut self, - finished_one: Option<&CollatorId>, + finished_one: &(CollatorId, Option), relay_parent_mode: ProspectiveParachainsMode, ) -> Option<(PendingCollation, CollatorId)> { // If finished one does not match waiting_collation, then we already dequeued another fetch // to replace it. - if self.fetching_from.as_ref() != finished_one { - gum::trace!( - target: LOG_TARGET, - waiting_collation = ?self.fetching_from, - ?finished_one, - "Not proceeding to the next collation - has already been done." - ); - return None + if let Some((collator_id, maybe_candidate_hash)) = self.fetching_from.as_ref() { + // If a candidate hash was saved previously, `finished_one` must include this too. + if collator_id != &finished_one.0 && + maybe_candidate_hash.map_or(true, |hash| Some(&hash) != finished_one.1.as_ref()) + { + gum::trace!( + target: LOG_TARGET, + waiting_collation = ?self.fetching_from, + ?finished_one, + "Not proceeding to the next collation - has already been done." + ); + return None + } } self.status.back_to_waiting(relay_parent_mode); diff --git a/node/network/collator-protocol/src/validator_side/mod.rs b/node/network/collator-protocol/src/validator_side/mod.rs index 28d4ad6beba4..ee685e84487a 100644 --- a/node/network/collator-protocol/src/validator_side/mod.rs +++ b/node/network/collator-protocol/src/validator_side/mod.rs @@ -453,7 +453,8 @@ struct State { /// /// A triggering timer means that the fetching took too long for our taste and we should give /// another collator the chance to be faster (dequeue next fetch request as well). - collation_fetch_timeouts: FuturesUnordered>, + collation_fetch_timeouts: + FuturesUnordered, Hash)>>, /// Collations that we have successfully requested from peers and waiting /// on validation. @@ -605,11 +606,13 @@ async fn fetch_collation( if peer_data.has_advertised(&relay_parent, candidate_hash) { request_collation(sender, state, pc, id.clone(), peer_data.version, tx).await?; - let timeout = |collator_id, relay_parent| async move { + let timeout = |collator_id, candidate_hash, relay_parent| async move { Delay::new(MAX_UNSHARED_DOWNLOAD_TIME).await; - (collator_id, relay_parent) + (collator_id, candidate_hash, relay_parent) }; - state.collation_fetch_timeouts.push(timeout(id.clone(), relay_parent).boxed()); + state + .collation_fetch_timeouts + .push(timeout(id.clone(), candidate_hash, relay_parent).boxed()); state.collation_fetches.push(rx.map(move |r| ((id, pc), r)).boxed()); Ok(()) @@ -756,8 +759,13 @@ async fn request_collation( "Requesting collation", ); + let maybe_candidate_hash = + prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); per_relay_parent.collations.status = CollationStatus::Fetching; - per_relay_parent.collations.fetching_from.replace(collator_id); + per_relay_parent + .collations + .fetching_from + .replace((collator_id, maybe_candidate_hash)); sender .send_message(NetworkBridgeTxMessage::SendRequests( @@ -1343,7 +1351,9 @@ async fn process_msg( let fetched_collation = FetchedCollation::from(&receipt.to_plain()); if let Some(collation_event) = state.fetched_candidates.remove(&fetched_collation) { let (collator_id, pending_collation) = collation_event; - let PendingCollation { relay_parent, peer_id, para_id, .. } = pending_collation; + let PendingCollation { + relay_parent, peer_id, para_id, prospective_candidate, .. + } = pending_collation; note_good_collation(ctx.sender(), &state.peer_data, collator_id.clone()).await; if let Some(peer_data) = state.peer_data.get(&peer_id) { notify_collation_seconded( @@ -1361,7 +1371,15 @@ async fn process_msg( state.collations.note_seconded(para_id); } // If async backing is enabled, make an attempt to fetch next collation. - dequeue_next_collation_and_fetch(ctx, state, parent, collator_id).await; + let maybe_candidate_hash = + prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); + dequeue_next_collation_and_fetch( + ctx, + state, + parent, + (collator_id, maybe_candidate_hash), + ) + .await; } else { gum::debug!( target: LOG_TARGET, @@ -1372,6 +1390,7 @@ async fn process_msg( }, Invalid(parent, candidate_receipt) => { let fetched_collation = FetchedCollation::from(&candidate_receipt); + let candidate_hash = fetched_collation.candidate_hash; let id = match state.fetched_candidates.entry(fetched_collation) { Entry::Occupied(entry) if entry.get().1.commitments_hash == @@ -1391,7 +1410,7 @@ async fn process_msg( report_collator(ctx.sender(), &state.peer_data, id.clone()).await; - dequeue_next_collation_and_fetch(ctx, state, parent, id).await; + dequeue_next_collation_and_fetch(ctx, state, parent, (id, Some(candidate_hash))).await; }, } } @@ -1467,18 +1486,32 @@ pub(crate) async fn run( // Report malicious peer. modify_reputation(ctx.sender(), pc.peer_id, COST_REPORT_BAD).await; } - dequeue_next_collation_and_fetch(&mut ctx, &mut state, pc.relay_parent, collator_id).await; + let maybe_candidate_hash = + pc.prospective_candidate.as_ref().map(ProspectiveCandidate::candidate_hash); + dequeue_next_collation_and_fetch( + &mut ctx, + &mut state, + pc.relay_parent, + (collator_id, maybe_candidate_hash), + ) + .await; } } res = state.collation_fetch_timeouts.select_next_some() => { - let (collator_id, relay_parent) = res; + let (collator_id, maybe_candidate_hash, relay_parent) = res; gum::debug!( target: LOG_TARGET, ?relay_parent, ?collator_id, "Timeout hit - already seconded?" ); - dequeue_next_collation_and_fetch(&mut ctx, &mut state, relay_parent, collator_id).await; + dequeue_next_collation_and_fetch( + &mut ctx, + &mut state, + relay_parent, + (collator_id, maybe_candidate_hash), + ) + .await; } _ = check_collations_stream.next() => { let reputation_changes = poll_requests( @@ -1527,13 +1560,13 @@ async fn dequeue_next_collation_and_fetch( ctx: &mut Context, state: &mut State, relay_parent: Hash, - // The collator we tried to fetch from last. - previous_fetch: CollatorId, + // The collator we tried to fetch from last, optionally which candidate. + previous_fetch: (CollatorId, Option), ) { while let Some((next, id)) = state.per_relay_parent.get_mut(&relay_parent).and_then(|state| { state .collations - .get_next_collation_to_fetch(Some(&previous_fetch), state.prospective_parachains_mode) + .get_next_collation_to_fetch(&previous_fetch, state.prospective_parachains_mode) }) { gum::debug!( target: LOG_TARGET, From 61195c2a7aa50b7283d31371a772a0672f75b13b Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Sat, 8 Oct 2022 12:59:43 +0300 Subject: [PATCH 41/45] Reintegrate connection management --- .../src/collator_side/mod.rs | 110 +++++++++++++++--- .../src/collator_side/tests/mod.rs | 3 + .../src/collator_side/validators_buffer.rs | 46 ++++---- node/subsystem-test-helpers/src/lib.rs | 31 ++++- 4 files changed, 145 insertions(+), 45 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 6e63c21c9630..898f7f2701f9 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -21,7 +21,7 @@ use std::{ }; use bitvec::{bitvec, vec::BitVec}; -use futures::{channel::oneshot, pin_mut, select, FutureExt, StreamExt}; +use futures::{channel::oneshot, future::Fuse, pin_mut, select, FutureExt, StreamExt}; use sp_core::Pair; use polkadot_node_network_protocol::{ @@ -49,7 +49,7 @@ use polkadot_node_subsystem_util::{ }; use polkadot_primitives::v2::{ AuthorityDiscoveryId, CandidateHash, CandidateReceipt, CollatorPair, CoreIndex, CoreState, - Hash, Id as ParaId, + GroupIndex, Hash, Id as ParaId, SessionIndex, }; use super::{ @@ -64,11 +64,13 @@ mod collation; mod metrics; #[cfg(test)] mod tests; +mod validators_buffer; use collation::{ ActiveCollationFetches, Collation, CollationSendResult, CollationStatus, VersionedCollationRequest, WaitingCollationFetches, }; +use validators_buffer::{ValidatorGroupsBuffer, VALIDATORS_BUFFER_CAPACITY}; pub use metrics::Metrics; @@ -85,6 +87,21 @@ const COST_APPARENT_FLOOD: Rep = /// For considerations on this value, see: https://github.com/paritytech/polkadot/issues/4386 const MAX_UNSHARED_UPLOAD_TIME: Duration = Duration::from_millis(150); +/// Ensure that collator issues a connection request at least once every this many seconds. +/// Usually it's done when advertising new collation. However, if the core stays occupied or +/// it's not our turn to produce a candidate, it's important to disconnect from previous +/// peers. +/// +/// Validators are obtained from [`ValidatorGroupsBuffer::validators_to_connect`]. +const RECONNECT_TIMEOUT: Duration = Duration::from_secs(12); + +/// Future that when resolved indicates that we should update reserved peer-set +/// of validators we want to be connected to. +/// +/// `Pending` variant never finishes and should be used when there're no peers +/// connected. +type ReconnectTimeout = Fuse; + /// Info about validators we are currently connected to. /// /// It keeps track to which validators we advertised our collation. @@ -229,6 +246,13 @@ struct State { /// by `PeerConnected` events. peer_ids: HashMap>, + /// Tracks which validators we want to stay connected to. + validator_groups_buf: ValidatorGroupsBuffer, + + /// Timeout-future that enforces collator to update the peer-set at least once + /// every [`RECONNECT_TIMEOUT`] seconds. + reconnect_timeout: ReconnectTimeout, + /// Metrics. metrics: Metrics, @@ -260,6 +284,8 @@ impl State { span_per_relay_parent: Default::default(), collation_result_senders: Default::default(), peer_ids: Default::default(), + validator_groups_buf: ValidatorGroupsBuffer::with_capacity(VALIDATORS_BUFFER_CAPACITY), + reconnect_timeout: Fuse::terminated(), waiting_collation_fetches: Default::default(), active_collation_fetches: Default::default(), } @@ -350,10 +376,10 @@ async fn distribute_collation( // // When prospective parachains are disabled, candidate relay parent here is // guaranteed to be an active leaf. - let current_validators = + let GroupValidators { validators, session_index, group_index } = determine_our_validators(ctx, runtime, our_core, num_cores, candidate_relay_parent).await?; - if current_validators.validators.is_empty() { + if validators.is_empty() { gum::warn!( target: LOG_TARGET, core = ?our_core, @@ -363,28 +389,37 @@ async fn distribute_collation( return Ok(()) } + // It's important to insert new collation interests **before** + // issuing a connection request. + // + // If a validator managed to fetch all the relevant collations + // but still assigned to our core, we keep the connection alive. + state.validator_groups_buf.note_collation_advertised( + candidate_hash, + session_index, + group_index, + &validators, + ); + gum::debug!( target: LOG_TARGET, para_id = %id, candidate_relay_parent = %candidate_relay_parent, relay_parent_mode = ?relay_parent_mode, - candidate_hash = ?candidate_hash, + ?candidate_hash, pov_hash = ?pov.hash(), core = ?our_core, - ?current_validators, + current_validators = ?validators, "Accepted collation, connecting to validators." ); let validators_at_relay_parent = &mut per_relay_parent.validator_group.validators; if validators_at_relay_parent.is_empty() { - *validators_at_relay_parent = current_validators.validators.clone(); + *validators_at_relay_parent = validators; } - // Issue a discovery request for the validators of the current group: - // - // TODO [now]: some kind of connection management is necessary to avoid - // dropping peers from e.g. implicit view assignments. - connect_to_validators(ctx, current_validators.validators.into_iter().collect()).await; + // Update a set of connected validators if necessary. + state.reconnect_timeout = connect_to_validators(ctx, &state.validator_groups_buf).await; if let Some(result_sender) = result_sender { state.collation_result_senders.insert(candidate_hash, result_sender); @@ -469,6 +504,9 @@ async fn determine_core( struct GroupValidators { /// The validators of above group (their discovery keys). validators: Vec, + + session_index: SessionIndex, + group_index: GroupIndex, } /// Figure out current group of validators assigned to the para being collated on. @@ -502,7 +540,11 @@ async fn determine_our_validators( let current_validators = current_validators.iter().map(|i| validators[i.0 as usize].clone()).collect(); - let current_validators = GroupValidators { validators: current_validators }; + let current_validators = GroupValidators { + validators: current_validators, + session_index, + group_index: current_group_index, + }; Ok(current_validators) } @@ -554,13 +596,18 @@ async fn declare( } } -/// Issue a connection request to a set of validators and -/// revoke the previous connection request. +/// Updates a set of connected validators based on their advertisement-bits +/// in a validators buffer. +/// +/// Should be called again once a returned future resolves. #[overseer::contextbounds(CollatorProtocol, prefix = self::overseer)] async fn connect_to_validators( ctx: &mut Context, - validator_ids: Vec, -) { + validator_groups_buf: &ValidatorGroupsBuffer, +) -> ReconnectTimeout { + let validator_ids = validator_groups_buf.validators_to_connect(); + let is_disconnect = validator_ids.is_empty(); + // ignore address resolution failure // will reissue a new request on new collation let (failed, _) = oneshot::channel(); @@ -570,6 +617,14 @@ async fn connect_to_validators( failed, }) .await; + + if is_disconnect { + gum::trace!(target: LOG_TARGET, "Disconnecting from all peers"); + // Never resolves. + Fuse::terminated() + } else { + futures_timer::Delay::new(RECONNECT_TIMEOUT).fuse() + } } /// Advertise collation to the given `peer`. @@ -1158,7 +1213,9 @@ where .map(|per_relay_parent| per_relay_parent.collations) .unwrap_or_default(); for collation in collations.into_values() { - state.collation_result_senders.remove(&collation.receipt.hash()); + let candidate_hash = collation.receipt.hash(); + state.collation_result_senders.remove(&candidate_hash); + state.validator_groups_buf.remove_candidate(&candidate_hash); match collation.status { CollationStatus::Created => gum::warn!( @@ -1210,6 +1267,7 @@ pub(crate) async fn run( pin_mut!(recv_req_v1); pin_mut!(recv_req_v2); + let mut reconnect_timeout = &mut state.reconnect_timeout; select! { msg = ctx.recv().fuse() => match msg.map_err(FatalError::SubsystemReceive)? { FromOrchestra::Communication { msg } => { @@ -1238,6 +1296,10 @@ pub(crate) async fn run( // timeout, we simply start processing next request. // The request it still alive, it should be kept in a waiting queue. } else { + for authority_id in state.peer_ids.get(&peer_id).into_iter().flatten() { + // Timeout not hit, this peer is no longer interested in this relay parent. + state.validator_groups_buf.reset_validator_interest(candidate_hash, authority_id); + } waiting.waiting_peers.remove(&(peer_id, candidate_hash)); } @@ -1278,7 +1340,17 @@ pub(crate) async fn run( send_collation(&mut state, next, receipt, pov).await; } - } + }, + _ = reconnect_timeout => { + state.reconnect_timeout = + connect_to_validators(&mut ctx, &state.validator_groups_buf).await; + + gum::trace!( + target: LOG_TARGET, + timeout = ?RECONNECT_TIMEOUT, + "Peer-set updated due to a timeout" + ); + }, in_req = recv_req_v1 => { let request = in_req.map(VersionedCollationRequest::from); diff --git a/node/network/collator-protocol/src/collator_side/tests/mod.rs b/node/network/collator-protocol/src/collator_side/tests/mod.rs index 904babc2850e..032d1782812e 100644 --- a/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -1294,6 +1294,9 @@ fn connect_to_buffered_groups() { } ); + // Let the subsystem process process the collation event. + test_helpers::Yield::new().await; + test_state.advance_to_new_round(&mut virtual_overseer, true).await; test_state.group_rotation_info = test_state.group_rotation_info.bump_rotation(); diff --git a/node/network/collator-protocol/src/collator_side/validators_buffer.rs b/node/network/collator-protocol/src/collator_side/validators_buffer.rs index 5bb31c72d6c5..196e1af0ff9f 100644 --- a/node/network/collator-protocol/src/collator_side/validators_buffer.rs +++ b/node/network/collator-protocol/src/collator_side/validators_buffer.rs @@ -23,8 +23,8 @@ //! We keep a simple FIFO buffer of N validator groups and a bitvec for each advertisement, //! 1 indicating we want to be connected to i-th validator in a buffer, 0 otherwise. //! -//! The bit is set to 1 for the whole **group** whenever it's inserted into the buffer. Given a relay -//! parent, one can reset a bit back to 0 for particular **validator**. For example, if a collation +//! The bit is set to 1 for the whole **group** whenever it's inserted into the buffer. Given a candidate +//! hash, one can reset a bit back to 0 for particular **validator**. For example, if a collation //! was fetched or some timeout has been hit. //! //! The bitwise OR over known advertisements gives us validators indices for connection request. @@ -37,7 +37,7 @@ use std::{ use bitvec::{bitvec, vec::BitVec}; -use polkadot_primitives::v2::{AuthorityDiscoveryId, GroupIndex, Hash, SessionIndex}; +use polkadot_primitives::v2::{AuthorityDiscoveryId, CandidateHash, GroupIndex, SessionIndex}; /// The ring buffer stores at most this many unique validator groups. /// @@ -66,9 +66,9 @@ pub struct ValidatorGroupsBuffer { group_infos: VecDeque, /// Continuous buffer of validators discovery keys. validators: VecDeque, - /// Mapping from relay-parent to bit-vectors with bits for all `validators`. + /// Mapping from candidate hashes to bit-vectors with bits for all `validators`. /// Invariants kept: All bit-vectors are guaranteed to have the same size. - should_be_connected: HashMap, + should_be_connected: HashMap, /// Buffer capacity, limits the number of **groups** tracked. cap: NonZeroUsize, } @@ -107,7 +107,7 @@ impl ValidatorGroupsBuffer { /// of the buffer. pub fn note_collation_advertised( &mut self, - relay_parent: Hash, + candidate_hash: CandidateHash, session_index: SessionIndex, group_index: GroupIndex, validators: &[AuthorityDiscoveryId], @@ -121,19 +121,19 @@ impl ValidatorGroupsBuffer { }) { Some((idx, group)) => { let group_start_idx = self.group_lengths_iter().take(idx).sum(); - self.set_bits(relay_parent, group_start_idx..(group_start_idx + group.len)); + self.set_bits(candidate_hash, group_start_idx..(group_start_idx + group.len)); }, - None => self.push(relay_parent, session_index, group_index, validators), + None => self.push(candidate_hash, session_index, group_index, validators), } } /// Note that a validator is no longer interested in a given relay parent. pub fn reset_validator_interest( &mut self, - relay_parent: Hash, + candidate_hash: CandidateHash, authority_id: &AuthorityDiscoveryId, ) { - let bits = match self.should_be_connected.get_mut(&relay_parent) { + let bits = match self.should_be_connected.get_mut(&candidate_hash) { Some(bits) => bits, None => return, }; @@ -145,17 +145,12 @@ impl ValidatorGroupsBuffer { } } - /// Remove relay parent from the buffer. + /// Remove advertised candidate from the buffer. /// /// The buffer will no longer track which validators are interested in a corresponding /// advertisement. - pub fn remove_relay_parent(&mut self, relay_parent: &Hash) { - self.should_be_connected.remove(relay_parent); - } - - /// Removes all advertisements from the buffer. - pub fn clear_advertisements(&mut self) { - self.should_be_connected.clear(); + pub fn remove_candidate(&mut self, candidate_hash: &CandidateHash) { + self.should_be_connected.remove(candidate_hash); } /// Pushes a new group to the buffer along with advertisement, setting all validators @@ -164,7 +159,7 @@ impl ValidatorGroupsBuffer { /// If the buffer is full, drops group from the tail. fn push( &mut self, - relay_parent: Hash, + candidate_hash: CandidateHash, session_index: SessionIndex, group_index: GroupIndex, validators: &[AuthorityDiscoveryId], @@ -193,17 +188,17 @@ impl ValidatorGroupsBuffer { self.should_be_connected .values_mut() .for_each(|bits| bits.resize(new_len, false)); - self.set_bits(relay_parent, group_start_idx..(group_start_idx + validators.len())); + self.set_bits(candidate_hash, group_start_idx..(group_start_idx + validators.len())); } /// Sets advertisement bits to 1 in a given range (usually corresponding to some group). /// If the relay parent is unknown, inserts 0-initialized bitvec first. /// /// The range must be ensured to be within bounds. - fn set_bits(&mut self, relay_parent: Hash, range: Range) { + fn set_bits(&mut self, candidate_hash: CandidateHash, range: Range) { let bits = self .should_be_connected - .entry(relay_parent) + .entry(candidate_hash) .or_insert_with(|| bitvec![0; self.validators.len()]); bits[range].fill(true); @@ -220,6 +215,7 @@ impl ValidatorGroupsBuffer { #[cfg(test)] mod tests { use super::*; + use polkadot_primitives::v2::Hash; use sp_keyring::Sr25519Keyring; #[test] @@ -227,8 +223,8 @@ mod tests { let cap = NonZeroUsize::new(1).unwrap(); let mut buf = ValidatorGroupsBuffer::with_capacity(cap); - let hash_a = Hash::repeat_byte(0x1); - let hash_b = Hash::repeat_byte(0x2); + let hash_a = CandidateHash(Hash::repeat_byte(0x1)); + let hash_b = CandidateHash(Hash::repeat_byte(0x2)); let validators: Vec<_> = [ Sr25519Keyring::Alice, @@ -263,7 +259,7 @@ mod tests { let cap = NonZeroUsize::new(3).unwrap(); let mut buf = ValidatorGroupsBuffer::with_capacity(cap); - let hashes: Vec<_> = (0..5).map(Hash::repeat_byte).collect(); + let hashes: Vec<_> = (0..5).map(|i| CandidateHash(Hash::repeat_byte(i))).collect(); let validators: Vec<_> = [ Sr25519Keyring::Alice, diff --git a/node/subsystem-test-helpers/src/lib.rs b/node/subsystem-test-helpers/src/lib.rs index e2e61c2006d8..98a2840e3bda 100644 --- a/node/subsystem-test-helpers/src/lib.rs +++ b/node/subsystem-test-helpers/src/lib.rs @@ -33,7 +33,7 @@ use std::{ pin::Pin, sync::Arc, task::{Context, Poll, Waker}, - time::Duration, + time::Duration, future::Future }; /// Generally useful mock data providers for unit tests. @@ -391,6 +391,35 @@ macro_rules! arbitrary_order { }; } +/// Future that yields the execution once and resolves +/// immediately after. +/// +/// Useful when one wants to poll the background task to completion +/// before sending messages to it in order to avoid races. +pub struct Yield(bool); + +impl Yield { + /// Returns new `Yield` future. + pub fn new() -> Self { + Self(false) + } +} + +impl Future for Yield { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if !self.0 { + self.0 = true; + cx.waker().wake_by_ref(); + Poll::Pending + } else { + Poll::Ready(()) + } + } +} + + #[cfg(test)] mod tests { use super::*; From ef251276791e696a4d161cd514f1d9c121841eb0 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Sat, 8 Oct 2022 17:28:01 +0300 Subject: [PATCH 42/45] Timeout on advertisements --- .../src/collator_side/mod.rs | 37 ++++++++++++++++++- .../src/collator_side/validators_buffer.rs | 36 ++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/node/network/collator-protocol/src/collator_side/mod.rs b/node/network/collator-protocol/src/collator_side/mod.rs index 898f7f2701f9..79222fde117c 100644 --- a/node/network/collator-protocol/src/collator_side/mod.rs +++ b/node/network/collator-protocol/src/collator_side/mod.rs @@ -21,7 +21,9 @@ use std::{ }; use bitvec::{bitvec, vec::BitVec}; -use futures::{channel::oneshot, future::Fuse, pin_mut, select, FutureExt, StreamExt}; +use futures::{ + channel::oneshot, future::Fuse, pin_mut, select, stream::FuturesUnordered, FutureExt, StreamExt, +}; use sp_core::Pair; use polkadot_node_network_protocol::{ @@ -70,7 +72,9 @@ use collation::{ ActiveCollationFetches, Collation, CollationSendResult, CollationStatus, VersionedCollationRequest, WaitingCollationFetches, }; -use validators_buffer::{ValidatorGroupsBuffer, VALIDATORS_BUFFER_CAPACITY}; +use validators_buffer::{ + ResetInterestTimeout, ValidatorGroupsBuffer, RESET_INTEREST_TIMEOUT, VALIDATORS_BUFFER_CAPACITY, +}; pub use metrics::Metrics; @@ -266,6 +270,14 @@ struct State { /// /// Each future returns the relay parent of the finished collation fetch. active_collation_fetches: ActiveCollationFetches, + + /// Time limits for validators to fetch the collation once the advertisement + /// was sent. + /// + /// Given an implicit view a collation may stay in memory for significant amount + /// of time, if we don't timeout validators the node will keep attempting to connect + /// to unneeded peers. + advertisement_timeouts: FuturesUnordered, } impl State { @@ -288,6 +300,7 @@ impl State { reconnect_timeout: Fuse::terminated(), waiting_collation_fetches: Default::default(), active_collation_fetches: Default::default(), + advertisement_timeouts: Default::default(), } } } @@ -459,6 +472,7 @@ async fn distribute_collation( peer_id, peer_data.version, &state.peer_ids, + &mut state.advertisement_timeouts, &state.metrics, ) .await; @@ -641,6 +655,7 @@ async fn advertise_collation( peer: &PeerId, protocol_version: CollationVersion, peer_ids: &HashMap>, + advertisement_timeouts: &mut FuturesUnordered, metrics: &Metrics, ) { for (candidate_hash, collation) in per_relay_parent.collations.iter_mut() { @@ -708,6 +723,12 @@ async fn advertise_collation( .validator_group .advertised_to_peer(candidate_hash, &peer_ids, peer); + advertisement_timeouts.push(ResetInterestTimeout::new( + *candidate_hash, + *peer, + RESET_INTEREST_TIMEOUT, + )); + metrics.on_advertisement_made(); } } @@ -1070,6 +1091,7 @@ async fn handle_peer_view_change( &peer_id, *version, &state.peer_ids, + &mut state.advertisement_timeouts, &state.metrics, ) .await; @@ -1341,6 +1363,17 @@ pub(crate) async fn run( send_collation(&mut state, next, receipt, pov).await; } }, + (candidate_hash, peer_id) = state.advertisement_timeouts.select_next_some() => { + // NOTE: it doesn't necessarily mean that a validator gets disconnected, + // it only will if there're no other advertisements we want to send. + // + // No-op if the collation was already fetched or went out of view. + for authority_id in state.peer_ids.get(&peer_id).into_iter().flatten() { + state + .validator_groups_buf + .reset_validator_interest(candidate_hash, &authority_id); + } + } _ = reconnect_timeout => { state.reconnect_timeout = connect_to_validators(&mut ctx, &state.validator_groups_buf).await; diff --git a/node/network/collator-protocol/src/collator_side/validators_buffer.rs b/node/network/collator-protocol/src/collator_side/validators_buffer.rs index 196e1af0ff9f..9f1817aa2051 100644 --- a/node/network/collator-protocol/src/collator_side/validators_buffer.rs +++ b/node/network/collator-protocol/src/collator_side/validators_buffer.rs @@ -31,12 +31,18 @@ use std::{ collections::{HashMap, VecDeque}, + future::Future, num::NonZeroUsize, ops::Range, + pin::Pin, + task::{Context, Poll}, + time::Duration, }; use bitvec::{bitvec, vec::BitVec}; +use futures::FutureExt; +use polkadot_node_network_protocol::PeerId; use polkadot_primitives::v2::{AuthorityDiscoveryId, CandidateHash, GroupIndex, SessionIndex}; /// The ring buffer stores at most this many unique validator groups. @@ -212,6 +218,36 @@ impl ValidatorGroupsBuffer { } } +/// A timeout for resetting validators' interests in collations. +pub const RESET_INTEREST_TIMEOUT: Duration = Duration::from_secs(6); + +/// A future that returns a candidate hash along with validator discovery +/// keys once a timeout hit. +/// +/// If a validator doesn't manage to fetch a collation within this timeout +/// we should reset its interest in this advertisement in a buffer. For example, +/// when the PoV was already requested from another peer. +pub struct ResetInterestTimeout { + fut: futures_timer::Delay, + candidate_hash: CandidateHash, + peer_id: PeerId, +} + +impl ResetInterestTimeout { + /// Returns new `ResetInterestTimeout` that resolves after given timeout. + pub fn new(candidate_hash: CandidateHash, peer_id: PeerId, delay: Duration) -> Self { + Self { fut: futures_timer::Delay::new(delay), candidate_hash, peer_id } + } +} + +impl Future for ResetInterestTimeout { + type Output = (CandidateHash, PeerId); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.fut.poll_unpin(cx).map(|_| (self.candidate_hash, self.peer_id)) + } +} + #[cfg(test)] mod tests { use super::*; From 1323e706be6d8694294f74bc15e7818418a5a025 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 12 Oct 2022 13:57:23 +0300 Subject: [PATCH 43/45] fmt --- node/subsystem-test-helpers/src/lib.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/node/subsystem-test-helpers/src/lib.rs b/node/subsystem-test-helpers/src/lib.rs index 98a2840e3bda..79f833b7558c 100644 --- a/node/subsystem-test-helpers/src/lib.rs +++ b/node/subsystem-test-helpers/src/lib.rs @@ -30,10 +30,11 @@ use sp_core::testing::TaskExecutor; use std::{ convert::Infallible, + future::Future, pin::Pin, sync::Arc, task::{Context, Poll, Waker}, - time::Duration, future::Future + time::Duration, }; /// Generally useful mock data providers for unit tests. @@ -406,20 +407,19 @@ impl Yield { } impl Future for Yield { - type Output = (); - - fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if !self.0 { - self.0 = true; - cx.waker().wake_by_ref(); - Poll::Pending - } else { - Poll::Ready(()) - } - } + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if !self.0 { + self.0 = true; + cx.waker().wake_by_ref(); + Poll::Pending + } else { + Poll::Ready(()) + } + } } - #[cfg(test)] mod tests { use super::*; From 409483b097dadab89d4889692d5b2924cb12f55f Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 12 Oct 2022 15:14:12 +0300 Subject: [PATCH 44/45] spellcheck --- node/subsystem-types/src/messages.rs | 2 +- scripts/ci/gitlab/lingua.dic | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/node/subsystem-types/src/messages.rs b/node/subsystem-types/src/messages.rs index a6a72d8958c2..e3c03ee11c05 100644 --- a/node/subsystem-types/src/messages.rs +++ b/node/subsystem-types/src/messages.rs @@ -725,7 +725,7 @@ impl RuntimeApiRequest { /// `Disputes` pub const DISPUTES_RUNTIME_REQUIREMENT: u32 = 3; - /// Minimum version for valididty constraints, required for async backing. + /// Minimum version for validity constraints, required for async backing. /// /// 99 for now, should be adjusted to VSTAGING/actual runtime version once released. pub const VALIDITY_CONSTRAINTS: u32 = 99; diff --git a/scripts/ci/gitlab/lingua.dic b/scripts/ci/gitlab/lingua.dic index 3add6a276cf0..b68a8c20600a 100644 --- a/scripts/ci/gitlab/lingua.dic +++ b/scripts/ci/gitlab/lingua.dic @@ -298,6 +298,7 @@ unreserve unreserving unroutable unservable/B +unshare/D untrusted untyped unvested @@ -314,10 +315,11 @@ verify/R versa Versi version/DMSG -versioned VMP/SM VPS VRF/SM +vstaging +VStaging w3f/MS wakeup wakeups From c194b8e96e690e4c60547c3b9d059f6fdb5b1478 Mon Sep 17 00:00:00 2001 From: Chris Sosnin Date: Wed, 12 Oct 2022 16:06:22 +0300 Subject: [PATCH 45/45] update tests after merge --- .../src/validator_side/tests/mod.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/node/network/collator-protocol/src/validator_side/tests/mod.rs b/node/network/collator-protocol/src/validator_side/tests/mod.rs index d5d88128ea54..ffc8796d3450 100644 --- a/node/network/collator-protocol/src/validator_side/tests/mod.rs +++ b/node/network/collator-protocol/src/validator_side/tests/mod.rs @@ -765,20 +765,24 @@ fn fetches_next_collation() { let test_state = TestState::default(); test_harness(|test_harness| async move { - let TestHarness { mut virtual_overseer } = test_harness; + let TestHarness { mut virtual_overseer, .. } = test_harness; let second = Hash::random(); + let our_view = our_view![test_state.relay_parent, second]; + overseer_send( &mut virtual_overseer, CollatorProtocolMessage::NetworkBridgeUpdate(NetworkBridgeEvent::OurViewChange( - our_view![test_state.relay_parent, second], + our_view.clone(), )), ) .await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; - respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + for hash in our_view.iter() { + assert_runtime_version_request(&mut virtual_overseer, *hash).await; + respond_to_core_info_queries(&mut virtual_overseer, &test_state).await; + } let peer_b = PeerId::random(); let peer_c = PeerId::random();