Skip to content

Commit e22a0ef

Browse files
alexgghgithub-actions[bot]
authored andcommitted
Fix Possible bug: Vote import failed after aggression is enabled (#6690)
After finality started lagging on kusama around 025-11-25 15:55:40 validators started seeing ocassionally this log, when importing votes covering more than one assignment. ``` Possible bug: Vote import failed ``` That happens because the assumption that assignments from the same validator would have the same required routing doesn't hold after you enabled aggression, so you might end up receiving the first assignment then you modify the routing for it in `enable_aggression` then your receive the second assignment and the vote covering both assignments, so the rouing for the first and second assingment wouldn't match and we would fail to import the vote. From the logs I've seen, I don't think this is the reason the network didn't fully recover until the failsafe kicked it, because the votes had been already imported in approval-voting before this error. --------- Signed-off-by: Alexandru Gheorghe <alexandru.gheorghe@parity.io> (cherry picked from commit da95345)
1 parent ed1932e commit e22a0ef

File tree

3 files changed

+83
-13
lines changed

3 files changed

+83
-13
lines changed

polkadot/node/network/approval-distribution/src/lib.rs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,6 @@ enum ApprovalEntryError {
148148
InvalidCandidateIndex,
149149
DuplicateApproval,
150150
UnknownAssignment,
151-
#[allow(dead_code)]
152-
AssignmentsFollowedDifferentPaths(RequiredRouting, RequiredRouting),
153151
}
154152

155153
impl ApprovalEntry {
@@ -547,7 +545,7 @@ impl BlockEntry {
547545
&mut self,
548546
approval: IndirectSignedApprovalVoteV2,
549547
) -> Result<(RequiredRouting, HashSet<PeerId>), ApprovalEntryError> {
550-
let mut required_routing = None;
548+
let mut required_routing: Option<RequiredRouting> = None;
551549
let mut peers_randomly_routed_to = HashSet::new();
552550

553551
if self.candidates.len() < approval.candidate_indices.len() as usize {
@@ -574,16 +572,11 @@ impl BlockEntry {
574572
peers_randomly_routed_to
575573
.extend(approval_entry.routing_info().peers_randomly_routed.iter());
576574

577-
if let Some(required_routing) = required_routing {
578-
if required_routing != approval_entry.routing_info().required_routing {
579-
// This shouldn't happen since the required routing is computed based on the
580-
// validator_index, so two assignments from the same validators will have
581-
// the same required routing.
582-
return Err(ApprovalEntryError::AssignmentsFollowedDifferentPaths(
583-
required_routing,
584-
approval_entry.routing_info().required_routing,
585-
))
586-
}
575+
if let Some(current_required_routing) = required_routing {
576+
required_routing = Some(
577+
current_required_routing
578+
.combine(approval_entry.routing_info().required_routing),
579+
);
587580
} else {
588581
required_routing = Some(approval_entry.routing_info().required_routing)
589582
}

polkadot/node/network/protocol/src/grid_topology.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,22 @@ impl RequiredRouting {
575575
_ => false,
576576
}
577577
}
578+
579+
/// Combine two required routing sets into one that would cover both routing modes.
580+
pub fn combine(self, other: Self) -> Self {
581+
match (self, other) {
582+
(RequiredRouting::All, _) | (_, RequiredRouting::All) => RequiredRouting::All,
583+
(RequiredRouting::GridXY, _) | (_, RequiredRouting::GridXY) => RequiredRouting::GridXY,
584+
(RequiredRouting::GridX, RequiredRouting::GridY) |
585+
(RequiredRouting::GridY, RequiredRouting::GridX) => RequiredRouting::GridXY,
586+
(RequiredRouting::GridX, RequiredRouting::GridX) => RequiredRouting::GridX,
587+
(RequiredRouting::GridY, RequiredRouting::GridY) => RequiredRouting::GridY,
588+
(RequiredRouting::None, RequiredRouting::PendingTopology) |
589+
(RequiredRouting::PendingTopology, RequiredRouting::None) => RequiredRouting::PendingTopology,
590+
(RequiredRouting::None, _) | (RequiredRouting::PendingTopology, _) => other,
591+
(_, RequiredRouting::None) | (_, RequiredRouting::PendingTopology) => self,
592+
}
593+
}
578594
}
579595

580596
#[cfg(test)]
@@ -587,6 +603,50 @@ mod tests {
587603
rand_chacha::ChaCha12Rng::seed_from_u64(12345)
588604
}
589605

606+
#[test]
607+
fn test_required_routing_combine() {
608+
assert_eq!(RequiredRouting::All.combine(RequiredRouting::None), RequiredRouting::All);
609+
assert_eq!(RequiredRouting::All.combine(RequiredRouting::GridXY), RequiredRouting::All);
610+
assert_eq!(RequiredRouting::GridXY.combine(RequiredRouting::All), RequiredRouting::All);
611+
assert_eq!(RequiredRouting::None.combine(RequiredRouting::All), RequiredRouting::All);
612+
assert_eq!(RequiredRouting::None.combine(RequiredRouting::None), RequiredRouting::None);
613+
assert_eq!(
614+
RequiredRouting::PendingTopology.combine(RequiredRouting::GridX),
615+
RequiredRouting::GridX
616+
);
617+
618+
assert_eq!(
619+
RequiredRouting::GridX.combine(RequiredRouting::PendingTopology),
620+
RequiredRouting::GridX
621+
);
622+
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::GridY), RequiredRouting::GridXY);
623+
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::GridX), RequiredRouting::GridXY);
624+
assert_eq!(
625+
RequiredRouting::GridXY.combine(RequiredRouting::GridXY),
626+
RequiredRouting::GridXY
627+
);
628+
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::GridX), RequiredRouting::GridX);
629+
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::GridY), RequiredRouting::GridY);
630+
631+
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridY), RequiredRouting::GridY);
632+
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridX), RequiredRouting::GridX);
633+
assert_eq!(RequiredRouting::None.combine(RequiredRouting::GridXY), RequiredRouting::GridXY);
634+
635+
assert_eq!(RequiredRouting::GridY.combine(RequiredRouting::None), RequiredRouting::GridY);
636+
assert_eq!(RequiredRouting::GridX.combine(RequiredRouting::None), RequiredRouting::GridX);
637+
assert_eq!(RequiredRouting::GridXY.combine(RequiredRouting::None), RequiredRouting::GridXY);
638+
639+
assert_eq!(
640+
RequiredRouting::PendingTopology.combine(RequiredRouting::None),
641+
RequiredRouting::PendingTopology
642+
);
643+
644+
assert_eq!(
645+
RequiredRouting::None.combine(RequiredRouting::PendingTopology),
646+
RequiredRouting::PendingTopology
647+
);
648+
}
649+
590650
#[test]
591651
fn test_random_routing_sample() {
592652
// This test is fragile as it relies on a specific ChaCha12Rng

prdoc/pr_6690.prdoc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0
2+
# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json
3+
4+
title: Fix Possible bug, Vote import failed after aggression is enabled
5+
6+
doc:
7+
- audience: Node Dev
8+
description: |
9+
Fix the appearance of Possible bug: Vote import failed after aggression is enabled, the log itself is
10+
harmless because approval gets imported anyway and aggression is able to distribute it, nevertheless
11+
is something that can be easily be fixed by picking the highest required routing possible.
12+
13+
crates:
14+
- name: polkadot-node-network-protocol
15+
bump: minor
16+
- name: polkadot-approval-distribution
17+
bump: minor

0 commit comments

Comments
 (0)