Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions core/src/block_creation_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,15 +657,30 @@ fn create_and_insert_leader_bank(slot: Slot, parent_bank: Arc<Bank>, ctx: &mut L
ctx.poh_recorder.write().unwrap().set_bank(tpu_bank);

// If this the very first alpenglow block, include the genesis certificate
if parent_slot == ctx.genesis_cert.slot {
// Note: if the alpenglow genesis is 0, then this is a test cluster with Alpenglow enabled
// by default. No need to put in the genesis marker as the genesis account is already populated
// during cluster creation.
if parent_slot == ctx.genesis_cert.slot && parent_slot != 0 {
let genesis_marker = VersionedBlockMarker::Current(BlockMarkerV1::GenesisCertificate(
ctx.genesis_cert.clone(),
));
ctx.poh_recorder
.write()
.unwrap()

let mut poh_recorder = ctx.poh_recorder.write().unwrap();
// Send the genesis certificate
poh_recorder
.send_marker(genesis_marker)
.expect("Max tick height cannot have been reached");

// Process the genesis certificate
let bank = poh_recorder.bank().expect("Bank cannot have been cleared");
let processor = bank.block_component_processor.read().unwrap();
processor
.on_genesis_certificate(
bank.clone(),
ctx.genesis_cert.clone(),
&ctx.bank_forks.read().unwrap().migration_status(),
)
.expect("Recording genesis certificate should not fail");
}

// Wakeup banking stage
Expand Down
1 change: 0 additions & 1 deletion core/src/replay_stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,6 @@ impl ReplayStage {

let forks_root = bank_forks.read().unwrap().root();
let start_leader_time = if !migration_status.is_alpenglow_enabled() {
debug_assert!(votor_event_receiver.is_empty());
// Process cluster-agreed versions of duplicate slots for which we potentially
// have the wrong version. Our version was dead or pruned.
// Signalled by ancestor_hashes_service.
Expand Down
1 change: 1 addition & 0 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1593,6 +1593,7 @@ impl Validator {
};
(tower, VoteHistory::new(identity_keypair.pubkey(), 0))
};
migration_status.log_phase();

let last_vote = tower.last_vote();

Expand Down
98 changes: 94 additions & 4 deletions ledger/src/blockstore_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ pub fn confirm_slot(
.on_marker(
bank.clone_without_scheduler(),
parent_bank,
&marker,
marker,
migration_status,
is_final,
)
Expand Down Expand Up @@ -1856,6 +1856,54 @@ fn process_bank_0(
Ok(())
}

/// Clean up a failed slot and restart processing from the given genesis slot
fn cleanup_and_populate_pending_from_alpenglow_genesis(
first_alpenglow_bank: &BankWithScheduler,
genesis_slot: Slot,
bank_forks: &RwLock<BankForks>,
blockstore: &Blockstore,
leader_schedule_cache: &LeaderScheduleCache,
pending_slots: &mut Vec<(SlotMeta, Bank, Hash)>,
opts: &ProcessOptions,
migration_status: &MigrationStatus,
) -> result::Result<(), BlockstoreProcessorError> {
// `first_alpenglow_bank` was processed as a TowerBFT bank. Reset it.
let root_bank = bank_forks.read().unwrap().root_bank();
root_bank
.remove_unrooted_slots(&[(first_alpenglow_bank.slot(), first_alpenglow_bank.bank_id())]);
root_bank.clear_slot_signatures(first_alpenglow_bank.slot());
root_bank.prune_program_cache_by_deployment_slot(first_alpenglow_bank.slot());
blockstore
.remove_dead_slot(first_alpenglow_bank.slot())
.unwrap();

let genesis_slot_meta = blockstore
.meta(genesis_slot)
.map_err(|err| {
warn!("Failed to load meta for slot {genesis_slot}: {err:?}");
BlockstoreProcessorError::FailedToLoadMeta
})?
.unwrap();

warn!(
"{}: load_frozen_forks() restart processing from {genesis_slot} treating further blocks \
as Alpenglow banks",
migration_status.my_pubkey()
);
pending_slots.clear();
process_next_slots(
&bank_forks.read().unwrap().get(genesis_slot).unwrap(),
&genesis_slot_meta,
blockstore,
leader_schedule_cache,
pending_slots,
opts,
migration_status,
)?;

Ok(())
}

// Given a bank, add its children to the pending slots queue if those children slots are
// complete
fn process_next_slots(
Expand Down Expand Up @@ -1959,8 +2007,8 @@ fn load_frozen_forks(
let mut root = bank_forks.read().unwrap().root();
let max_root = std::cmp::max(root, blockstore_max_root);
info!(
"load_frozen_forks() latest root from blockstore: {blockstore_max_root}, max_root: \
{max_root}",
"load_frozen_forks() bank forks root {root}, latest root from blockstore: \
{blockstore_max_root}, max_root: {max_root}",
);

// The total number of slots processed
Expand Down Expand Up @@ -2043,6 +2091,31 @@ fn load_frozen_forks(
if opts.abort_on_invalid_block {
Err(error)?
}

// If this block was the first alpenglow block and advanced the migration phase, we can enable alpenglow.
//
// Note: since this code is all startup code we don't have to worry about shutting down `PohService` or any
// in flight activity of `ReplayStage`. This bank must have failed to freeze as it is an Alpenglow block
// being verified as a TowerBFT one.
//
// We are safe to cleanly transition to alpenglow here
if migration_status.is_ready_to_enable() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically analogous to when we enable alpenglow during steady state in replay.
Difference here is:

  • We reach ReadyToEnable by trying to process the first alpenglow block as a TowerBFT block and failing. While processing we observed the GenesisCertificate marker so we know that the migration happened, and which block is the genesis.
  • We don't have to purge the blocks > genesis, instead we just reprocesses them as Alpenglow blocks (ticks adjusted and markers allowed).
  • We have to reset the dead status and retry this first alpenglow block since we just failed to process it as a TowerBFT block

let genesis_slot = migration_status.enable_alpenglow_during_startup();

// We need to clear pending_slots as it might contain Alpenglow blocks initialized as TowerBFT banks.
// Clear and populate pending slots from alpenglow genesis
cleanup_and_populate_pending_from_alpenglow_genesis(
&bank,
genesis_slot,
bank_forks,
blockstore,
leader_schedule_cache,
&mut pending_slots,
opts,
&migration_status,
)?;
}

continue;
}
txs += progress.num_txs;
Expand Down Expand Up @@ -2105,7 +2178,11 @@ fn load_frozen_forks(
} else {
None
}
};
}.filter(|new_root_bank| {
// In the case that we've restarted while the migrationary period is going on but before alpenglow
// is enabled, don't root blocks past the migration slot
migration_status.should_root_during_startup(new_root_bank.slot())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

startup equivalent of

// We do not root during the migration - post genesis rooting is handled by votor
migration_status.should_report_commitment_or_root(*root)
});

});
m.stop();
voting_us += m.as_us();

Expand All @@ -2129,6 +2206,19 @@ fn load_frozen_forks(
all_banks.retain(|_, bank| bank.ancestors.contains_key(&root));
m.stop();
root_retain_us += m.as_us();

// If this root bank activated the feature flag, update migration status
if migration_status.is_pre_feature_activation() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

startup equivalent of

// Check if we've rooted a bank that will tell us the migration slot
if migration_status.is_pre_feature_activation() {

if let Some(slot) = bank_forks
.read()
.unwrap()
.root_bank()
.feature_set
.activated_slot(&agave_feature_set::alpenglow::id())
{
migration_status.record_feature_activation(slot);
}
}
}

slots_processed += 1;
Expand Down
2 changes: 1 addition & 1 deletion local-cluster/src/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pub const AG_DEBUG_LOG_FILTER: &str =
solana_core::block_creation_loop=trace,solana_votor=trace,solana_votor::voting_service=info,\
solana_votor::vote_history_storage=info,solana_core::validator=info,\
solana_votor::consensus_metrics=info,solana_core::consensus=info,\
solana_ledger::blockstore_processor=info";
solana_ledger::blockstore_processor=info,solana_ledger::blockstore=info";
pub const DEFAULT_NODE_STAKE: u64 = 10 * LAMPORTS_PER_SOL;

pub fn last_vote_in_tower(tower_path: &Path, node_pubkey: &Pubkey) -> Option<(Slot, Hash)> {
Expand Down
Loading