Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions core/src/block_creation_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,15 +657,30 @@ fn create_and_insert_leader_bank(slot: Slot, parent_bank: Arc<Bank>, ctx: &mut L
ctx.poh_recorder.write().unwrap().set_bank(tpu_bank);

// If this the very first alpenglow block, include the genesis certificate
if parent_slot == ctx.genesis_cert.slot {
// Note: if the alpenglow genesis is 0, then this is a test cluster with Alpenglow enabled
// by default. No need to put in the genesis marker as the genesis account is already populated
// during cluster creation.
if parent_slot == ctx.genesis_cert.slot && parent_slot != 0 {
let genesis_marker = VersionedBlockMarker::Current(BlockMarkerV1::GenesisCertificate(
ctx.genesis_cert.clone(),
));
ctx.poh_recorder
.write()
.unwrap()

let mut poh_recorder = ctx.poh_recorder.write().unwrap();
// Send the genesis certificate
poh_recorder
.send_marker(genesis_marker)
.expect("Max tick height cannot have been reached");

// Process the genesis certificate
let bank = poh_recorder.bank().expect("Bank cannot have been cleared");
let processor = bank.block_component_processor.read().unwrap();
processor
.on_genesis_certificate(
bank.clone(),
ctx.genesis_cert.clone(),
&ctx.bank_forks.read().unwrap().migration_status(),
)
.expect("Recording genesis certificate should not fail");
}

// Wakeup banking stage
Expand Down
1 change: 1 addition & 0 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1593,6 +1593,7 @@ impl Validator {
};
(tower, VoteHistory::new(identity_keypair.pubkey(), 0))
};
migration_status.log_phase();

let last_vote = tower.last_vote();

Expand Down
98 changes: 94 additions & 4 deletions ledger/src/blockstore_processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ pub fn confirm_slot(
.on_marker(
bank.clone_without_scheduler(),
parent_bank,
&marker,
marker,
migration_status,
is_final,
)
Expand Down Expand Up @@ -1856,6 +1856,54 @@ fn process_bank_0(
Ok(())
}

/// Clean up a failed slot and restart processing from the given genesis slot
fn cleanup_and_populate_pending_from_alpenglow_genesis(
first_alpenglow_bank: &BankWithScheduler,
genesis_slot: Slot,
bank_forks: &RwLock<BankForks>,
blockstore: &Blockstore,
leader_schedule_cache: &LeaderScheduleCache,
pending_slots: &mut Vec<(SlotMeta, Bank, Hash)>,
opts: &ProcessOptions,
migration_status: &MigrationStatus,
) -> result::Result<(), BlockstoreProcessorError> {
// `first_alpenglow_bank` was processed as a TowerBFT bank. Reset it.
let root_bank = bank_forks.read().unwrap().root_bank();
root_bank
.remove_unrooted_slots(&[(first_alpenglow_bank.slot(), first_alpenglow_bank.bank_id())]);
root_bank.clear_slot_signatures(first_alpenglow_bank.slot());
root_bank.prune_program_cache_by_deployment_slot(first_alpenglow_bank.slot());
blockstore
.remove_dead_slot(first_alpenglow_bank.slot())
.unwrap();

let genesis_slot_meta = blockstore
.meta(genesis_slot)
.map_err(|err| {
warn!("Failed to load meta for slot {genesis_slot}: {err:?}");
BlockstoreProcessorError::FailedToLoadMeta
})?
.unwrap();

warn!(
"{}: load_frozen_forks() restart processing from {genesis_slot} treating further blocks \
as Alpenglow banks",
migration_status.my_pubkey()
);
pending_slots.clear();
process_next_slots(
&bank_forks.read().unwrap().get(genesis_slot).unwrap(),
&genesis_slot_meta,
blockstore,
leader_schedule_cache,
pending_slots,
opts,
migration_status,
)?;

Ok(())
}

// Given a bank, add its children to the pending slots queue if those children slots are
// complete
fn process_next_slots(
Expand Down Expand Up @@ -1959,8 +2007,8 @@ fn load_frozen_forks(
let mut root = bank_forks.read().unwrap().root();
let max_root = std::cmp::max(root, blockstore_max_root);
info!(
"load_frozen_forks() latest root from blockstore: {blockstore_max_root}, max_root: \
{max_root}",
"load_frozen_forks() bank forks root {root}, latest root from blockstore: \
{blockstore_max_root}, max_root: {max_root}",
);

// The total number of slots processed
Expand Down Expand Up @@ -2043,6 +2091,31 @@ fn load_frozen_forks(
if opts.abort_on_invalid_block {
Err(error)?
}

// If this block was the first alpenglow block and advanced the migration phase, we can enable alpenglow.
//
// Note: since this code is all startup code we don't have to worry about shutting down `PohService` or any
// in flight activity of `ReplayStage`. This bank must have failed to freeze as it is an Alpenglow block
// being verified as a TowerBFT one.
//
// We are safe to cleanly transition to alpenglow here
if migration_status.is_ready_to_enable() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically analogous to when we enable alpenglow during steady state in replay.
Difference here is:

  • We reach ReadyToEnable by trying to process the first alpenglow block as a TowerBFT block and failing. While processing we observed the GenesisCertificate marker so we know that the migration happened, and which block is the genesis.
  • We don't have to purge the blocks > genesis, instead we just reprocesses them as Alpenglow blocks (ticks adjusted and markers allowed).
  • We have to reset the dead status and retry this first alpenglow block since we just failed to process it as a TowerBFT block

let genesis_slot = migration_status.enable_alpenglow_during_startup();

// We need to clear pending_slots as it might contain Alpenglow blocks initialized as TowerBFT banks.
// Clear and populate pending slots from alpenglow genesis
cleanup_and_populate_pending_from_alpenglow_genesis(
&bank,
genesis_slot,
bank_forks,
blockstore,
leader_schedule_cache,
&mut pending_slots,
opts,
&migration_status,
)?;
}

continue;
}
txs += progress.num_txs;
Expand Down Expand Up @@ -2105,7 +2178,11 @@ fn load_frozen_forks(
} else {
None
}
};
}.filter(|new_root_bank| {
// In the case that we've restarted while the migrationary period is going on but before alpenglow
// is enabled, don't root blocks past the migration slot
migration_status.should_root_during_startup(new_root_bank.slot())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

startup equivalent of

// We do not root during the migration - post genesis rooting is handled by votor
migration_status.should_report_commitment_or_root(*root)
});

});
m.stop();
voting_us += m.as_us();

Expand All @@ -2129,6 +2206,19 @@ fn load_frozen_forks(
all_banks.retain(|_, bank| bank.ancestors.contains_key(&root));
m.stop();
root_retain_us += m.as_us();

// If this root bank activated the feature flag, update migration status
if migration_status.is_pre_feature_activation() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

startup equivalent of

// Check if we've rooted a bank that will tell us the migration slot
if migration_status.is_pre_feature_activation() {

if let Some(slot) = bank_forks
.read()
.unwrap()
.root_bank()
.feature_set
.activated_slot(&agave_feature_set::alpenglow::id())
{
migration_status.record_feature_activation(slot);
}
}
}

slots_processed += 1;
Expand Down
34 changes: 29 additions & 5 deletions local-cluster/tests/local_cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6223,9 +6223,8 @@ fn test_alpenglow_imbalanced_stakes_catchup() {
);
}

fn test_alpenglow_migration(num_nodes: usize) {
fn test_alpenglow_migration(num_nodes: usize, test_name: &str) -> LocalCluster {
solana_logger::setup_with_default(AG_DEBUG_LOG_FILTER);
let test_name = &format!("test_alpenglow_migration_{num_nodes}");

let vote_listener_socket = solana_net_utils::bind_to_localhost().unwrap();
let vote_listener_addr = vote_listener_socket.try_clone().unwrap();
Expand Down Expand Up @@ -6341,24 +6340,49 @@ fn test_alpenglow_migration(num_nodes: usize) {

// Additionally ensure that roots are being made
cluster.check_for_new_roots(8, test_name, SocketAddrSpace::Unspecified);
cluster
}

#[test]
#[serial]
fn test_alpenglow_migration_1() {
test_alpenglow_migration(1)
test_alpenglow_migration(1, "test_alpenglow_migration_1");
}

#[test]
#[serial]
fn test_alpenglow_migration_2() {
test_alpenglow_migration(2)
test_alpenglow_migration(2, "test_alpenglow_migration_2");
}

#[test]
#[serial]
fn test_alpenglow_migration_4() {
test_alpenglow_migration(4)
test_alpenglow_migration(4, "test_alpenglow_migration_4");
}

#[test]
#[serial]
fn test_alpenglow_restart_post_migration() {
let test_name = "test_alpenglow_restart_post_migration";

// Start a 2 node cluster and have it go through the migration
let mut cluster = test_alpenglow_migration(2, test_name);

// Now restart one of the nodes. This causes the cluster to temporarily halt
let node_pubkey = cluster.get_node_pubkeys()[0];
cluster.exit_restart_node(
&node_pubkey,
safe_clone_config(&cluster.validators.get(&node_pubkey).unwrap().config),
SocketAddrSpace::Unspecified,
);

// The restarted node will startup from genesis (0) so this test verifies the following:
// - When processing the feature flag activation during startup increment `PreFeatureActivation` -> `Migration`
// - When processing the first alpenglow block during startup increment `Migration` -> `ReadyToEnable`
// - If we reach `ReadyToEnable` during startup, enable alpenglow
// - Ensure that during startup we set ticks correctly
cluster.check_for_new_roots(8, test_name, SocketAddrSpace::Unspecified);
}

fn broadcast_vote(
Expand Down
12 changes: 9 additions & 3 deletions runtime/src/bank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2826,16 +2826,22 @@ impl Bank {
}

/// If this is an alpenglow block, return the genesis certificate.
///
/// Note: this should only be called on a frozen bank, otherwise results
/// might be inaccurate for the first alpenglow bank.
pub fn get_alpenglow_genesis_certificate(&self) -> Option<Certificate> {
self.get_account(&GENESIS_CERTIFICATE_ACCOUNT).map(|acct| {
acct.deserialize_data()
.expect("Programmer error deserializing genesis certificate")
})
}

/// For use in the first Alpenglow block, set the genesis certificate.
pub fn set_alpenglow_genesis_certificate(&self, cert: &Certificate) {
let cert_size = bincode::serialized_size(cert).unwrap();
let lamports = Rent::default().minimum_balance(cert_size as usize);
let cert_acct = AccountSharedData::new_data(lamports, cert, &system_program::ID).unwrap();

self.store_account_and_update_capitalization(&GENESIS_CERTIFICATE_ACCOUNT, &cert_acct);
}

pub fn confirmed_last_blockhash(&self) -> Hash {
const NUM_BLOCKHASH_CONFIRMATIONS: usize = 3;

Expand Down
Loading