Skip to content

Commit 2a87016

Browse files
authored
Fix lookup disconnect peer (#5815)
* Test lookup peer disconnect modes * Fix lookup peer disconnected return early
1 parent b5de925 commit 2a87016

3 files changed

Lines changed: 59 additions & 83 deletions

File tree

beacon_node/network/src/sync/block_lookups/mod.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
191191
.iter()
192192
.find(|(_, l)| l.block_root() == block_to_drop)
193193
{
194-
for &peer_id in lookup.all_used_peers() {
194+
for &peer_id in lookup.all_peers() {
195195
cx.report_peer(
196196
peer_id,
197197
PeerAction::LowToleranceError,
@@ -387,8 +387,15 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
387387

388388
pub fn peer_disconnected(&mut self, peer_id: &PeerId) {
389389
self.single_block_lookups.retain(|_, lookup| {
390-
if lookup.remove_peer(peer_id) {
391-
debug!(self.log, "Dropping single lookup after peer disconnection"; "block_root" => ?lookup.block_root());
390+
lookup.remove_peer(peer_id);
391+
392+
// Note: this condition should be removed in the future. It's not strictly necessary to drop a
393+
// lookup if there are no peers left. Lookup should only be dropped if it can not make progress
394+
if lookup.has_no_peers() {
395+
debug!(self.log,
396+
"Dropping single lookup after peer disconnection";
397+
"block_root" => ?lookup.block_root()
398+
);
392399
false
393400
} else {
394401
true
@@ -545,7 +552,7 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
545552
lookup.continue_requests(cx)
546553
}
547554
Action::ParentUnknown { parent_root } => {
548-
let peers = lookup.all_available_peers().cloned().collect::<Vec<_>>();
555+
let peers = lookup.all_peers().copied().collect::<Vec<_>>();
549556
lookup.set_awaiting_parent(parent_root);
550557
debug!(self.log, "Marking lookup as awaiting parent"; "id" => lookup.id, "block_root" => ?block_root, "parent_root" => ?parent_root);
551558
self.search_parent_of_child(parent_root, block_root, &peers, cx);

beacon_node/network/src/sync/block_lookups/single_block_lookup.rs

Lines changed: 29 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use crate::sync::network_context::{
77
};
88
use beacon_chain::BeaconChainTypes;
99
use derivative::Derivative;
10-
use itertools::Itertools;
1110
use rand::seq::IteratorRandom;
1211
use std::collections::HashSet;
1312
use std::fmt::Debug;
@@ -64,6 +63,9 @@ pub struct SingleBlockLookup<T: BeaconChainTypes> {
6463
pub id: Id,
6564
pub block_request_state: BlockRequestState<T::EthSpec>,
6665
pub blob_request_state: BlobRequestState<T::EthSpec>,
66+
/// Peers that claim to have imported this set of block components
67+
#[derivative(Debug(format_with = "fmt_peer_set_as_len"))]
68+
peers: HashSet<PeerId>,
6769
block_root: Hash256,
6870
awaiting_parent: Option<Hash256>,
6971
created: Instant,
@@ -78,8 +80,9 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
7880
) -> Self {
7981
Self {
8082
id,
81-
block_request_state: BlockRequestState::new(requested_block_root, peers),
82-
blob_request_state: BlobRequestState::new(requested_block_root, peers),
83+
block_request_state: BlockRequestState::new(requested_block_root),
84+
blob_request_state: BlobRequestState::new(requested_block_root),
85+
peers: HashSet::from_iter(peers.iter().copied()),
8386
block_root: requested_block_root,
8487
awaiting_parent,
8588
created: Instant::now(),
@@ -134,22 +137,9 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
134137
self.block_root() == block_root
135138
}
136139

137-
/// Get all unique used peers across block and blob requests.
138-
pub fn all_used_peers(&self) -> impl Iterator<Item = &PeerId> + '_ {
139-
self.block_request_state
140-
.state
141-
.get_used_peers()
142-
.chain(self.blob_request_state.state.get_used_peers())
143-
.unique()
144-
}
145-
146-
/// Get all unique available peers across block and blob requests.
147-
pub fn all_available_peers(&self) -> impl Iterator<Item = &PeerId> + '_ {
148-
self.block_request_state
149-
.state
150-
.get_available_peers()
151-
.chain(self.blob_request_state.state.get_available_peers())
152-
.unique()
140+
/// Get all unique peers that claim to have imported this set of block components
141+
pub fn all_peers(&self) -> impl Iterator<Item = &PeerId> + '_ {
142+
self.peers.iter()
153143
}
154144

155145
/// Makes progress on all requests of this lookup. Any error is not recoverable and must result
@@ -198,7 +188,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
198188
return Err(LookupRequestError::TooManyAttempts { cannot_process });
199189
}
200190

201-
let Some(peer_id) = request.get_state_mut().use_rand_available_peer() else {
191+
let Some(peer_id) = self.use_rand_available_peer() else {
202192
if awaiting_parent {
203193
// Allow lookups awaiting for a parent to have zero peers. If when the parent
204194
// resolve they still have zero peers the lookup will fail gracefully.
@@ -208,6 +198,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
208198
}
209199
};
210200

201+
let request = R::request_state_mut(self);
211202
match request.make_request(id, peer_id, downloaded_block_expected_blobs, cx)? {
212203
LookupRequestResult::RequestSent(req_id) => {
213204
request.get_state_mut().on_download_start(req_id)?
@@ -238,9 +229,7 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
238229
/// Add peer to all request states. The peer must be able to serve this request.
239230
/// Returns true if the peer was newly inserted into some request state.
240231
pub fn add_peer(&mut self, peer_id: PeerId) -> bool {
241-
let inserted_block = self.block_request_state.state.add_peer(&peer_id);
242-
let inserted_blob = self.blob_request_state.state.add_peer(&peer_id);
243-
inserted_block || inserted_blob
232+
self.peers.insert(peer_id)
244233
}
245234

246235
/// Returns true if the block has already been downloaded.
@@ -252,8 +241,17 @@ impl<T: BeaconChainTypes> SingleBlockLookup<T> {
252241
/// Remove peer from available peers. Return true if there are no more available peers and all
253242
/// requests are not expecting any future event (AwaitingDownload).
254243
pub fn remove_peer(&mut self, peer_id: &PeerId) -> bool {
255-
self.block_request_state.state.remove_peer(peer_id)
256-
&& self.blob_request_state.state.remove_peer(peer_id)
244+
self.peers.remove(peer_id)
245+
}
246+
247+
/// Returns true if this lookup has zero peers
248+
pub fn has_no_peers(&self) -> bool {
249+
self.peers.is_empty()
250+
}
251+
252+
/// Selects a random peer from available peers if any
253+
fn use_rand_available_peer(&mut self) -> Option<PeerId> {
254+
self.peers.iter().choose(&mut rand::thread_rng()).copied()
257255
}
258256
}
259257

@@ -267,10 +265,10 @@ pub struct BlobRequestState<E: EthSpec> {
267265
}
268266

269267
impl<E: EthSpec> BlobRequestState<E> {
270-
pub fn new(block_root: Hash256, peer_source: &[PeerId]) -> Self {
268+
pub fn new(block_root: Hash256) -> Self {
271269
Self {
272270
block_root,
273-
state: SingleLookupRequestState::new(peer_source),
271+
state: SingleLookupRequestState::new(),
274272
}
275273
}
276274
}
@@ -285,10 +283,10 @@ pub struct BlockRequestState<E: EthSpec> {
285283
}
286284

287285
impl<E: EthSpec> BlockRequestState<E> {
288-
pub fn new(block_root: Hash256, peers: &[PeerId]) -> Self {
286+
pub fn new(block_root: Hash256) -> Self {
289287
Self {
290288
requested_block_root: block_root,
291-
state: SingleLookupRequestState::new(peers),
289+
state: SingleLookupRequestState::new(),
292290
}
293291
}
294292
}
@@ -318,29 +316,16 @@ pub enum State<T: Clone> {
318316
pub struct SingleLookupRequestState<T: Clone> {
319317
/// State of this request.
320318
state: State<T>,
321-
/// Peers that should have this block or blob.
322-
#[derivative(Debug(format_with = "fmt_peer_set"))]
323-
available_peers: HashSet<PeerId>,
324-
/// Peers from which we have requested this block.
325-
#[derivative(Debug = "ignore")]
326-
used_peers: HashSet<PeerId>,
327319
/// How many times have we attempted to process this block or blob.
328320
failed_processing: u8,
329321
/// How many times have we attempted to download this block or blob.
330322
failed_downloading: u8,
331323
}
332324

333325
impl<T: Clone> SingleLookupRequestState<T> {
334-
pub fn new(peers: &[PeerId]) -> Self {
335-
let mut available_peers = HashSet::default();
336-
for peer in peers.iter().copied() {
337-
available_peers.insert(peer);
338-
}
339-
326+
pub fn new() -> Self {
340327
Self {
341328
state: State::AwaitingDownload,
342-
available_peers,
343-
used_peers: HashSet::default(),
344329
failed_processing: 0,
345330
failed_downloading: 0,
346331
}
@@ -518,38 +503,6 @@ impl<T: Clone> SingleLookupRequestState<T> {
518503
pub fn more_failed_processing_attempts(&self) -> bool {
519504
self.failed_processing >= self.failed_downloading
520505
}
521-
522-
/// Add peer to this request states. The peer must be able to serve this request.
523-
/// Returns true if the peer is newly inserted.
524-
pub fn add_peer(&mut self, peer_id: &PeerId) -> bool {
525-
self.available_peers.insert(*peer_id)
526-
}
527-
528-
/// Remove peer from available peers. Return true if there are no more available peers and the
529-
/// request is not expecting any future event (AwaitingDownload).
530-
pub fn remove_peer(&mut self, disconnected_peer_id: &PeerId) -> bool {
531-
self.available_peers.remove(disconnected_peer_id);
532-
self.available_peers.is_empty() && self.is_awaiting_download()
533-
}
534-
535-
pub fn get_used_peers(&self) -> impl Iterator<Item = &PeerId> {
536-
self.used_peers.iter()
537-
}
538-
539-
pub fn get_available_peers(&self) -> impl Iterator<Item = &PeerId> {
540-
self.available_peers.iter()
541-
}
542-
543-
/// Selects a random peer from available peers if any, inserts it in used peers and returns it.
544-
pub fn use_rand_available_peer(&mut self) -> Option<PeerId> {
545-
let peer_id = self
546-
.available_peers
547-
.iter()
548-
.choose(&mut rand::thread_rng())
549-
.copied()?;
550-
self.used_peers.insert(peer_id);
551-
Some(peer_id)
552-
}
553506
}
554507

555508
// Display is used in the BadState assertions above
@@ -573,7 +526,7 @@ impl<T: Clone> std::fmt::Debug for State<T> {
573526
}
574527
}
575528

576-
fn fmt_peer_set(
529+
fn fmt_peer_set_as_len(
577530
peer_set: &HashSet<PeerId>,
578531
f: &mut std::fmt::Formatter,
579532
) -> Result<(), std::fmt::Error> {

beacon_node/network/src/sync/block_lookups/tests.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,10 @@ impl TestRig {
526526

527527
fn peer_disconnected(&mut self, disconnected_peer_id: PeerId) {
528528
self.send_sync_message(SyncMessage::Disconnect(disconnected_peer_id));
529+
}
529530

530-
// Return RPCErrors for all active requests of peer
531+
/// Return RPCErrors for all active requests of peer
532+
fn rpc_error_all_active_requests(&mut self, disconnected_peer_id: PeerId) {
531533
self.drain_network_rx();
532534
while let Ok(request_id) = self.pop_received_network_event(|ev| match ev {
533535
NetworkMessage::SendRequest {
@@ -1265,27 +1267,41 @@ fn test_parent_lookup_too_deep() {
12651267
}
12661268

12671269
#[test]
1268-
fn test_parent_lookup_disconnection_no_peers_left() {
1270+
fn test_lookup_peer_disconnected_no_peers_left_while_request() {
12691271
let mut rig = TestRig::test_setup();
12701272
let peer_id = rig.new_connected_peer();
12711273
let trigger_block = rig.rand_block();
12721274
rig.trigger_unknown_parent_block(peer_id, trigger_block.into());
1275+
rig.peer_disconnected(peer_id);
1276+
rig.rpc_error_all_active_requests(peer_id);
1277+
rig.expect_no_active_lookups();
1278+
}
12731279

1280+
#[test]
1281+
fn test_lookup_peer_disconnected_no_peers_left_not_while_request() {
1282+
let mut rig = TestRig::test_setup();
1283+
let peer_id = rig.new_connected_peer();
1284+
let trigger_block = rig.rand_block();
1285+
rig.trigger_unknown_parent_block(peer_id, trigger_block.into());
12741286
rig.peer_disconnected(peer_id);
1287+
// Note: this test case may be removed in the future. It's not strictly necessary to drop a
1288+
// lookup if there are no peers left. Lookup should only be dropped if it can not make progress
12751289
rig.expect_no_active_lookups();
12761290
}
12771291

12781292
#[test]
12791293
fn test_lookup_disconnection_peer_left() {
12801294
let mut rig = TestRig::test_setup();
12811295
let peer_ids = (0..2).map(|_| rig.new_connected_peer()).collect::<Vec<_>>();
1296+
let disconnecting_peer = *peer_ids.first().unwrap();
12821297
let block_root = Hash256::random();
12831298
// lookup should have two peers associated with the same block
12841299
for peer_id in peer_ids.iter() {
12851300
rig.trigger_unknown_block_from_attestation(block_root, *peer_id);
12861301
}
12871302
// Disconnect the first peer only, which is the one handling the request
1288-
rig.peer_disconnected(*peer_ids.first().unwrap());
1303+
rig.peer_disconnected(disconnecting_peer);
1304+
rig.rpc_error_all_active_requests(disconnecting_peer);
12891305
rig.assert_single_lookups_count(1);
12901306
}
12911307

0 commit comments

Comments
 (0)