Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ lru = { workspace = true }
min-max-heap = { workspace = true }
num_cpus = { workspace = true }
num_enum = { workspace = true }
parking_lot = { workspace = true }
prio-graph = { workspace = true }
qualifier_attr = { workspace = true }
quinn = { workspace = true }
Expand Down
35 changes: 31 additions & 4 deletions core/src/block_creation_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use {
banking_trace::BankingTracer,
replay_stage::{Finalizer, ReplayStage},
},
parking_lot::RwLock as PLRwLock,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this over std::sync::RwLock ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A couple of reasons:

  • My understanding is that parking lot is fairer to writers and ensures that writers do not starve. In particular, if there is a writer waiting, then additional readers also have to wait
  • for small critical sections, benchmarks seems to suggest that parking lot does better.

solana_clock::Slot,
solana_entry::block_component::{
BlockFooterV1, BlockMarkerV1, VersionedBlockFooter, VersionedBlockMarker,
Expand All @@ -31,8 +32,14 @@ use {
bank_forks::BankForks,
},
solana_version::version,
solana_votor::{common::block_timeout, event::LeaderWindowInfo, votor::LeaderWindowNotifier},
solana_votor_messages::migration::MigrationStatus,
solana_votor::{
common::block_timeout, consensus_rewards::ConsensusRewards, event::LeaderWindowInfo,
votor::LeaderWindowNotifier,
},
solana_votor_messages::{
migration::MigrationStatus,
rewards_certificate::{NotarRewardCertificate, SkipRewardCertificate},
},
stats::{BlockCreationLoopMetrics, SlotMetrics},
std::{
sync::{
Expand Down Expand Up @@ -79,6 +86,7 @@ pub struct BlockCreationLoopConfig {
pub poh_recorder: Arc<RwLock<PohRecorder>>,
pub leader_schedule_cache: Arc<LeaderScheduleCache>,
pub rpc_subscriptions: Option<Arc<RpcSubscriptions>>,
pub consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,

// Notifiers
pub banking_tracer: Arc<BankingTracer>,
Expand All @@ -104,6 +112,7 @@ struct LeaderContext {
slot_status_notifier: Option<SlotStatusNotifier>,
banking_tracer: Arc<BankingTracer>,
replay_highest_frozen: Arc<ReplayHighestFrozen>,
consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,

// Metrics
metrics: BlockCreationLoopMetrics,
Expand Down Expand Up @@ -134,9 +143,15 @@ enum StartLeaderError {
),
}

fn produce_block_footer(block_producer_time_nanos: u64) -> VersionedBlockMarker {
fn produce_block_footer(
block_producer_time_nanos: u64,
skip_reward_certificate: Option<SkipRewardCertificate>,
notar_reward_certificate: Option<NotarRewardCertificate>,
) -> VersionedBlockMarker {
let footer = BlockFooterV1 {
block_producer_time_nanos,
skip_reward_certificate,
notar_reward_certificate,
block_user_agent: format!("agave/{}", version!()).into_bytes(),
};

Expand Down Expand Up @@ -166,6 +181,7 @@ fn start_loop(config: BlockCreationLoopConfig) {
leader_window_notifier,
replay_highest_frozen,
migration_status,
consensus_rewards,
} = config;

// Similar to Votor, if this loop dies kill the validator
Expand All @@ -189,6 +205,7 @@ fn start_loop(config: BlockCreationLoopConfig) {
replay_highest_frozen,
metrics: BlockCreationLoopMetrics::default(),
slot_metrics: SlotMetrics::default(),
consensus_rewards,
};

info!("{my_pubkey}: Block creation loop initialized");
Expand Down Expand Up @@ -301,6 +318,7 @@ fn produce_window(
if let Err(e) = record_and_complete_block(
ctx.poh_recorder.as_ref(),
&mut ctx.record_receiver,
ctx.consensus_rewards.clone(),
skip_timer,
timeout,
) {
Expand Down Expand Up @@ -342,9 +360,17 @@ fn produce_window(
fn record_and_complete_block(
poh_recorder: &RwLock<PohRecorder>,
record_receiver: &mut RecordReceiver,
consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,
block_timer: Instant,
block_timeout: Duration,
) -> Result<(), PohRecorderError> {
// Taking a read lock on consensus_rewards can contend with the write lock in bls_sigverifier.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feedback from @AshwinSekar: #607 (comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AshwinSekar : I've reworked the PR to use channels and a dedicated thread now. There is also a "stateless" version of wants_vote that we could call from the BLS sigverifier to do some filtering and avoid cloning all votes. I'll add support for that as a separate commit.

// We are ready to produce the block footer now, while we gather other bits of data, we can block on the consensus_rewards lock in a separate thread to minimise contention.
let handle = std::thread::spawn(move || {
// XXX: how to look up the slot.
let slot = u64::MAX;
consensus_rewards.read().build_rewards_certs(slot)
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this isn't final, but spawning unnamed threads like this is not how we typically like to do stuff in parallel

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

absolutely, this was just to show what I propose to do. Is there an example that I can follow on how I could do something like this?

loop {
let remaining_slot_time = block_timeout.saturating_sub(block_timer.elapsed());
if remaining_slot_time.is_zero() {
Expand Down Expand Up @@ -378,7 +404,8 @@ fn record_and_complete_block(
// Construct and send the block footer
let mut w_poh_recorder = poh_recorder.write().unwrap();
let block_producer_time_nanos = w_poh_recorder.working_bank_block_producer_time_nanos();
let footer = produce_block_footer(block_producer_time_nanos);
let (skip, notar) = handle.join().unwrap();
let footer = produce_block_footer(block_producer_time_nanos, skip, notar);
w_poh_recorder.send_marker(footer)?;

// Alpentick and clear bank
Expand Down
58 changes: 51 additions & 7 deletions core/src/bls_sigverify/bls_sigverifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use {
},
bitvec::prelude::{BitVec, Lsb0},
crossbeam_channel::{Sender, TrySendError},
parking_lot::RwLock as PLRwLock,
rayon::iter::{
IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator,
},
Expand All @@ -23,7 +24,10 @@ use {
solana_runtime::{bank::Bank, bank_forks::SharableBanks, epoch_stakes::BLSPubkeyToRankMap},
solana_signer_store::{decode, DecodeError},
solana_streamer::packet::PacketBatch,
solana_votor::consensus_metrics::{ConsensusMetricsEvent, ConsensusMetricsEventSender},
solana_votor::{
consensus_metrics::{ConsensusMetricsEvent, ConsensusMetricsEventSender},
consensus_rewards::ConsensusRewards,
},
solana_votor_messages::{
consensus_message::{Certificate, CertificateType, ConsensusMessage, VoteMessage},
vote::Vote,
Expand Down Expand Up @@ -92,6 +96,7 @@ pub struct BLSSigVerifier {
consensus_metrics_sender: ConsensusMetricsEventSender,
last_checked_root_slot: Slot,
alpenglow_last_voted: Arc<AlpenglowLastVoted>,
consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,
}

impl BLSSigVerifier {
Expand Down Expand Up @@ -172,8 +177,17 @@ impl BLSSigVerifier {
id: *solana_pubkey,
vote: vote_message.vote,
});
// Only need votes newer than root slot

// consensus pool does not need votes for slots other than root slot however the rewards container may still need them.
if vote_message.vote.slot() <= root_bank.slot() {
// the only module that takes a write lock on consensus_rewards is this one and it does not take the write lock while it is verifying votes so this should never block.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feedback from Brennan: maybe we can do this inline if we can demonstrate that we have enough compute resources to handle the additional verification. This will also require ensuring that the dedup can handle the DDoS vectors we are worried about.

if self
.consensus_rewards
.read()
.wants_vote(root_bank.slot(), &vote_message)
{
// XXX: actually verify and send the votes. The verification and sending should happen off the critical path.
}
self.stats.received_old.fetch_add(1, Ordering::Relaxed);
packet.meta_mut().set_discard(true);
continue;
Expand Down Expand Up @@ -219,8 +233,8 @@ impl BLSSigVerifier {
|| self.verify_and_send_certificates(certs_to_verify, &root_bank),
);

votes_result?;
certs_result?;
let rewards_votes = votes_result?;
let () = certs_result?;

// Send to RPC service for last voted tracking
self.alpenglow_last_voted
Expand All @@ -235,6 +249,17 @@ impl BLSSigVerifier {
warn!("could not send consensus metrics, receive side of channel is closed");
}

{
// This should be the only place that is taking a write lock on consensus_rewards.
// This lock should not contend with any other operations in this module.
// It can contend with the read lock in the block creation loop though as such we should hold it for as little time as possible.
let mut guard = self.consensus_rewards.write();
let root_slot = root_bank.slot();
for v in rewards_votes {
guard.add_vote_message(root_slot, v);
}
}

self.stats.maybe_report_stats();

Ok(())
Expand All @@ -248,6 +273,7 @@ impl BLSSigVerifier {
message_sender: Sender<ConsensusMessage>,
consensus_metrics_sender: ConsensusMetricsEventSender,
alpenglow_last_voted: Arc<AlpenglowLastVoted>,
consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,
) -> Self {
Self {
sharable_banks,
Expand All @@ -259,14 +285,32 @@ impl BLSSigVerifier {
consensus_metrics_sender,
last_checked_root_slot: 0,
alpenglow_last_voted,
consensus_rewards,
}
}

/// Verifies votes and sends verified votes to the consensus pool.
/// Also returns a copy of the verified votes that the rewards container is interested is so that the caller can send them to it.
fn verify_and_send_votes(
&self,
votes_to_verify: Vec<VoteToVerify>,
) -> Result<(), BLSSigVerifyServiceError<ConsensusMessage>> {
) -> Result<Vec<VoteMessage>, BLSSigVerifyServiceError<ConsensusMessage>> {
let verified_votes = self.verify_votes(votes_to_verify);

let rewards_votes = {
// the only module that takes a write lock on consensus_rewards is this one and it does not take the write lock while it is verifying votes so this should never block.
let guard = self.consensus_rewards.read();
let root_slot = self.sharable_banks.root().slot();
verified_votes
.iter()
.filter_map(|vote| {
guard
.wants_vote(root_slot, &vote.vote_message)
.then_some(vote.vote_message.clone())
})
.collect()
};

self.stats
.total_valid_packets
.fetch_add(verified_votes.len() as u64, Ordering::Relaxed);
Expand All @@ -285,7 +329,7 @@ impl BLSSigVerifier {
}
}

// Send the BLS vote messaage to certificate pool
// Send the votes to the consensus pool
match self
.message_sender
.try_send(ConsensusMessage::Vote(vote.vote_message))
Expand Down Expand Up @@ -319,7 +363,7 @@ impl BLSSigVerifier {
}
}

Ok(())
Ok(rewards_votes)
}

fn verify_votes(&self, votes_to_verify: Vec<VoteToVerify>) -> Vec<VoteToVerify> {
Expand Down
4 changes: 4 additions & 0 deletions core/src/tvu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use {
},
bytes::Bytes,
crossbeam_channel::{bounded, unbounded, Receiver, Sender},
parking_lot::RwLock as PLRwLock,
solana_client::connection_cache::ConnectionCache,
solana_clock::Slot,
solana_geyser_plugin_manager::block_metadata_notifier_interface::BlockMetadataNotifierArc,
Expand Down Expand Up @@ -60,6 +61,7 @@ use {
},
solana_turbine::{retransmit_stage::RetransmitStage, xdp::XdpSender},
solana_votor::{
consensus_rewards::ConsensusRewards,
event::{VotorEventReceiver, VotorEventSender},
vote_history::VoteHistory,
vote_history_storage::VoteHistoryStorage,
Expand Down Expand Up @@ -214,6 +216,7 @@ impl Tvu {
key_notifiers: Arc<RwLock<KeyUpdaters>>,
alpenglow_last_voted: Arc<AlpenglowLastVoted>,
migration_status: Arc<MigrationStatus>,
consensus_rewards: Arc<PLRwLock<ConsensusRewards>>,
) -> Result<Self, String> {
let (consensus_message_sender, consensus_message_receiver) =
bounded(MAX_ALPENGLOW_PACKET_NUM);
Expand Down Expand Up @@ -274,6 +277,7 @@ impl Tvu {
consensus_message_sender.clone(),
consensus_metrics_sender.clone(),
alpenglow_last_voted.clone(),
consensus_rewards,
);
BLSSigverifyService::new(bls_packet_receiver, verifier)
};
Expand Down
9 changes: 9 additions & 0 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ use {
},
anyhow::{anyhow, Context, Result},
crossbeam_channel::{bounded, unbounded, Receiver},
parking_lot::RwLock as PLRwLock,
quinn::Endpoint,
solana_accounts_db::{
accounts_db::{AccountsDbConfig, ACCOUNTS_DB_CONFIG_FOR_TESTING},
Expand Down Expand Up @@ -142,6 +143,7 @@ use {
solana_validator_exit::Exit,
solana_vote_program::vote_state,
solana_votor::{
consensus_rewards::ConsensusRewards,
vote_history::{VoteHistory, VoteHistoryError},
vote_history_storage::{NullVoteHistoryStorage, VoteHistoryStorage},
voting_service::VotingServiceOverride,
Expand Down Expand Up @@ -1425,6 +1427,11 @@ impl Validator {
migration_status.clone(),
);

let consensus_rewards = Arc::new(PLRwLock::new(ConsensusRewards::new(
cluster_info.clone(),
leader_schedule_cache.clone(),
)));

let block_creation_loop_config = BlockCreationLoopConfig {
exit: exit.clone(),
migration_status: migration_status.clone(),
Expand All @@ -1439,6 +1446,7 @@ impl Validator {
record_receiver: record_receiver.clone(),
leader_window_notifier: leader_window_notifier.clone(),
replay_highest_frozen: replay_highest_frozen.clone(),
consensus_rewards: consensus_rewards.clone(),
};
let block_creation_loop = BlockCreationLoop::new(block_creation_loop_config);

Expand Down Expand Up @@ -1693,6 +1701,7 @@ impl Validator {
key_notifiers.clone(),
alpenglow_last_voted.clone(),
migration_status.clone(),
consensus_rewards,
)
.map_err(ValidatorError::Other)?;

Expand Down
1 change: 1 addition & 0 deletions entry/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ solana-runtime-transaction = { workspace = true }
solana-sha256-hasher = { workspace = true }
solana-transaction = { workspace = true }
solana-transaction-error = { workspace = true }
solana-votor-messages = { workspace = true }
thiserror = { workspace = true }

[dev-dependencies]
Expand Down
Loading