diff --git a/ledger/src/blockstore.rs b/ledger/src/blockstore.rs index d256e80a5c..f3f3cb41a2 100644 --- a/ledger/src/blockstore.rs +++ b/ledger/src/blockstore.rs @@ -18,7 +18,11 @@ use { leader_schedule_cache::LeaderScheduleCache, next_slots_iterator::NextSlotsIterator, shred::{ - self, ErasureSetId, ProcessShredsStats, ReedSolomonCache, Shred, ShredData, ShredFlags, + self, + merkle_tree::{ + get_proof_size, make_merkle_proof, make_merkle_tree, SIZE_OF_MERKLE_PROOF_ENTRY, + }, + ErasureSetId, ProcessShredsStats, ReedSolomonCache, Shred, ShredData, ShredFlags, ShredId, ShredType, Shredder, DATA_SHREDS_PER_FEC_BLOCK, }, slot_stats::{ShredSource, SlotsStats}, @@ -49,6 +53,7 @@ use { solana_metrics::datapoint_error, solana_pubkey::Pubkey, solana_runtime::bank::Bank, + solana_sha256_hasher::hashv, solana_signature::Signature, solana_signer::Signer, solana_storage_proto::{StoredExtendedRewards, StoredTransactionStatusMeta}, @@ -275,6 +280,7 @@ pub struct Blockstore { alt_data_shred_cf: LedgerColumn, alt_merkle_root_meta_cf: LedgerColumn, parent_meta_cf: LedgerColumn, + double_merkle_meta_cf: LedgerColumn, highest_primary_index_slot: RwLock>, max_root: AtomicU64, @@ -458,6 +464,7 @@ impl Blockstore { let alt_data_shred_cf = db.column(); let alt_merkle_root_meta_cf = db.column(); let parent_meta_cf = db.column(); + let double_merkle_meta_cf = db.column(); // Get max root or 0 if it doesn't exist let max_root = roots_cf @@ -499,6 +506,7 @@ impl Blockstore { alt_data_shred_cf, alt_merkle_root_meta_cf, parent_meta_cf, + double_merkle_meta_cf, highest_primary_index_slot: RwLock::>::default(), new_shreds_signals: Mutex::default(), @@ -866,6 +874,128 @@ impl Blockstore { } } + /// Fetches (and populates if needed) the DoubleMerkleMeta for the given block. + /// Returns the double_merkle_root + /// + /// Should only be used on full blocks. + pub fn get_or_compute_double_merkle_root( + &self, + slot: Slot, + block_location: BlockLocation, + ) -> std::result::Result { + if let Some(double_merkle_meta) = self + .double_merkle_meta_cf + .get((slot, block_location)) + .expect("Blockstore operations must succeed") + { + return Ok(double_merkle_meta.double_merkle_root); + } + + // Compute double merkle - slot must be full at this point + let Some(slot_meta) = self + .meta_cf + .get(slot) + .expect("Blockstore operations must succeed") + else { + return Err(BlockstoreProcessorError::FailedToLoadMeta); + }; + + if !slot_meta.is_full() { + return Err(BlockstoreProcessorError::SlotNotFull(slot, block_location)); + } + + let Some(last_index) = slot_meta.last_index else { + return Err(BlockstoreProcessorError::SlotNotFull(slot, block_location)); + }; + + // This function is only used post Alpenglow, so implicitely gated by SIMD-0317 as that is a prereq + let fec_set_count = (last_index / (DATA_SHREDS_PER_FEC_BLOCK as u64) + 1) as usize; + + let Some(parent_meta) = self + .parent_meta_cf + .get((slot, block_location)) + .expect("Blockstore operations must succeed") + else { + return Err(BlockstoreProcessorError::MissingParent( + slot, + block_location, + )); + }; + + // Collect merkle roots for each FEC set + let mut merkle_tree_leaves = Vec::with_capacity(fec_set_count + 1); + + for i in 0..fec_set_count { + let fec_set_index = (i * DATA_SHREDS_PER_FEC_BLOCK) as u32; + let erasure_set_id = ErasureSetId::new(slot, fec_set_index); + + let Some(merkle_root) = self + .merkle_root_meta_from_location(erasure_set_id, block_location) + .expect("Blockstore operations must succeed") + .and_then(|mrm| mrm.merkle_root()) + else { + return Err(BlockstoreProcessorError::MissingMerkleRoot( + slot, + fec_set_index as u64, + )); + }; + merkle_tree_leaves.push(Ok(merkle_root)); + } + + // Add parent info as the last leaf + let parent_info_hash = hashv(&[ + &parent_meta.parent_slot.to_le_bytes(), + parent_meta.parent_block_id.as_ref(), + ]); + merkle_tree_leaves.push(Ok(parent_info_hash)); + + // Build the merkle tree + let merkle_tree = make_merkle_tree(merkle_tree_leaves).map_err(|_| { + BlockstoreProcessorError::FailedDoubleMerkleRootConstruction(slot, block_location) + })?; + let double_merkle_root = *merkle_tree + .last() + .expect("Merkle tree cannot be empty as fec_set_count is > 0"); + + // Build proofs + let tree_size = fec_set_count + 1; + let mut proofs = Vec::with_capacity(tree_size); + + for leaf_index in 0..tree_size { + let proof_iter = make_merkle_proof(leaf_index, tree_size, &merkle_tree); + let proof: Vec = proof_iter + .map(|proof| proof.map(|p| p.as_slice())) + .collect::, _>>() + .map_err(|_| { + BlockstoreProcessorError::FailedDoubleMerkleRootConstruction( + slot, + block_location, + ) + })? + .into_iter() + .flatten() + .copied() + .collect(); + debug_assert!( + proof.len() == get_proof_size(tree_size) as usize * SIZE_OF_MERKLE_PROOF_ENTRY + ); + proofs.push(proof); + } + + // Create and store DoubleMerkleMeta + let double_merkle_meta = DoubleMerkleMeta { + double_merkle_root, + fec_set_count, + proofs, + }; + + self.double_merkle_meta_cf + .put((slot, block_location), &double_merkle_meta) + .expect("Blockstore operations must succeed"); + + Ok(double_merkle_root) + } + /// Check whether the specified slot is an orphan slot which does not /// have a parent slot. /// @@ -6106,7 +6236,11 @@ pub mod tests { crate::{ genesis_utils::{create_genesis_config, GenesisConfigInfo}, leader_schedule::{FixedSchedule, IdentityKeyedLeaderSchedule}, - shred::{max_ticks_per_n_shreds, MAX_DATA_SHREDS_PER_SLOT}, + shred::{ + max_ticks_per_n_shreds, + merkle_tree::{get_merkle_root, MerkleProofEntry}, + MAX_DATA_SHREDS_PER_SLOT, + }, }, assert_matches::assert_matches, bincode::{serialize, Options}, @@ -12783,4 +12917,122 @@ pub mod tests { Err(TransactionError::InsufficientFundsForFee) ); } + + #[test] + fn test_get_or_compute_double_merkle_root() { + let ledger_path = get_tmp_ledger_path_auto_delete!(); + let blockstore = Blockstore::open(ledger_path.path()).unwrap(); + + let parent_slot = 990; + let slot = 1000; + let num_entries = 200; + + // Create a set of shreds for a complete block + let (data_shreds, coding_shreds, leader_schedule) = + setup_erasure_shreds(slot, parent_slot, num_entries); + + // Create ParentMeta + let parent_meta = ParentMeta { + parent_slot, + parent_block_id: Hash::default(), + replay_fec_set_index: 0, + }; + blockstore + .parent_meta_cf + .put((slot, BlockLocation::Original), &parent_meta) + .unwrap(); + + // Insert shreds into blockstore + let mut fec_set_roots = [Hash::default(); 3]; + for shred in data_shreds.iter().chain(coding_shreds.iter()) { + if shred.is_data() && shred.index() % (DATA_SHREDS_PER_FEC_BLOCK as u32) == 0 { + // store fec set merkle roots for later + fec_set_roots[(shred.index() as usize) / DATA_SHREDS_PER_FEC_BLOCK] = + shred.merkle_root().unwrap(); + } + let duplicates = + blockstore.insert_shred_return_duplicate(shred.clone(), &leader_schedule); + assert!(duplicates.is_empty()); + } + + let slot_meta = blockstore.meta(slot).unwrap().unwrap(); + assert!(slot_meta.is_full()); + + // Test getting the double merkle root + let block_location = BlockLocation::Original; + let double_merkle_root = blockstore + .get_or_compute_double_merkle_root(slot, block_location) + .unwrap(); + + let double_merkle_meta = blockstore + .double_merkle_meta_cf + .get((slot, block_location)) + .unwrap() + .unwrap(); + + // Verify meta + assert_eq!(double_merkle_meta.double_merkle_root, double_merkle_root); + assert_eq!(double_merkle_meta.fec_set_count, 3); // With 200 entries, we should have 3 FEC sets + assert_eq!(double_merkle_meta.proofs.len(), 4); // 3 FEC set, 1 parent info + + // Verify the proofs + let proof_size = get_proof_size(double_merkle_meta.fec_set_count + 1) as usize; + + // Fec sets + for (fec_set, root) in fec_set_roots.iter().enumerate() { + let proof = &double_merkle_meta.proofs[fec_set]; + let proof = proof + .chunks(SIZE_OF_MERKLE_PROOF_ENTRY) + .map(<&MerkleProofEntry>::try_from) + .map(std::result::Result::unwrap); + assert_eq!(proof_size, proof.clone().count()); + + let double_merkle_root = get_merkle_root(fec_set, *root, proof).unwrap(); + assert_eq!(double_merkle_meta.double_merkle_root, double_merkle_root); + } + + // Parent info - final proof + let parent_info_hash = hashv(&[ + &parent_slot.to_le_bytes(), + parent_meta.parent_block_id.as_ref(), + ]); + let parent_info_proof = &double_merkle_meta.proofs[double_merkle_meta.fec_set_count]; + let proof = parent_info_proof + .chunks(SIZE_OF_MERKLE_PROOF_ENTRY) + .map(<&MerkleProofEntry>::try_from) + .map(std::result::Result::unwrap); + assert_eq!(proof_size, proof.clone().count()); + + let double_merkle_root = + get_merkle_root(double_merkle_meta.fec_set_count, parent_info_hash, proof).unwrap(); + assert_eq!(double_merkle_meta.double_merkle_root, double_merkle_root); + + // Slot not full should fail + let incomplete_slot = 1001; // Make it a child of slot 1000 + let (partial_shreds, _, leader_schedule) = + setup_erasure_shreds_with_index_and_chained_merkle_and_last_in_slot( + incomplete_slot, + slot, // parent is 1000 + 5, + 0, + Some(Hash::new_from_array(rand::thread_rng().gen())), + false, // not last in slot + ); + + for shred in partial_shreds.iter().take(3) { + let duplicates = + blockstore.insert_shred_return_duplicate(shred.clone(), &leader_schedule); + assert!(duplicates.is_empty()); + } + + let result = blockstore.get_or_compute_double_merkle_root(incomplete_slot, block_location); + match result { + Err(BlockstoreProcessorError::SlotNotFull(slot, loc)) => { + assert_eq!(slot, incomplete_slot); + assert_eq!(loc, block_location); + } // This is the expected error + Err(e) => panic!("Unexpected error: {e:?}"), + Ok(_) => panic!("Expected error but got Ok"), + } + } } diff --git a/ledger/src/blockstore/blockstore_purge.rs b/ledger/src/blockstore/blockstore_purge.rs index c61b53bfb0..472910ee0e 100644 --- a/ledger/src/blockstore/blockstore_purge.rs +++ b/ledger/src/blockstore/blockstore_purge.rs @@ -332,6 +332,10 @@ impl Blockstore { & self .parent_meta_cf .delete_range_in_batch(write_batch, from_slot, to_slot) + .is_ok() + & self + .double_merkle_meta_cf + .delete_range_in_batch(write_batch, from_slot, to_slot) .is_ok(); match purge_type { @@ -437,6 +441,14 @@ impl Blockstore { .alt_merkle_root_meta_cf .delete_file_in_range(from_slot, to_slot) .is_ok() + & self + .parent_meta_cf + .delete_file_in_range(from_slot, to_slot) + .is_ok() + & self + .double_merkle_meta_cf + .delete_file_in_range(from_slot, to_slot) + .is_ok() } /// Returns true if the special columns, TransactionStatus and diff --git a/ledger/src/blockstore/column.rs b/ledger/src/blockstore/column.rs index ae7fa21a5e..84c6531dd2 100644 --- a/ledger/src/blockstore/column.rs +++ b/ledger/src/blockstore/column.rs @@ -288,6 +288,16 @@ pub mod columns { /// * index type: `(Slot, BlockLocation)` /// * value type: [`blockstore_meta::ParentMeta`] pub struct ParentMeta; + + #[derive(Debug)] + /// The double merkle root metadata column + /// + /// This column stores details about the double merkle root of a block. + /// We update this column when we finish ingesting all the shreds of the block. + /// + /// * index type: `(Slot, BlockLocation)` + /// * value type: [`blockstore_meta::DoubleMerkleMeta`] + pub struct DoubleMerkleMeta; } macro_rules! convert_column_index_to_key_bytes { @@ -1118,3 +1128,52 @@ impl ColumnName for columns::ParentMeta { impl TypedColumn for columns::ParentMeta { type Type = blockstore_meta::ParentMeta; } + +impl Column for columns::DoubleMerkleMeta { + type Index = (Slot, BlockLocation); + // Key size: Slot (8 bytes) + Hash (32 bytes) + // When BlockLocation::Original, the hash is Hash::default(). + type Key = [u8; std::mem::size_of::() + HASH_BYTES]; + + #[inline] + fn key((slot, location): &Self::Index) -> Self::Key { + let mut key = [0u8; std::mem::size_of::() + HASH_BYTES]; + key[..8].copy_from_slice(&slot.to_le_bytes()); + + let hash_bytes = match location { + BlockLocation::Original => &Hash::default().to_bytes(), + BlockLocation::Alternate { block_id } => &block_id.to_bytes(), + }; + + key[8..40].copy_from_slice(hash_bytes); + + key + } + + fn index(key: &[u8]) -> Self::Index { + let slot = Slot::from_le_bytes(key[0..8].try_into().unwrap()); + let hash = Hash::new_from_array(key[8..40].try_into().unwrap()); + let location = match hash == Hash::default() { + true => BlockLocation::Original, + false => BlockLocation::Alternate { block_id: hash }, + }; + + (slot, location) + } + + fn as_index(slot: Slot) -> Self::Index { + (slot, BlockLocation::Original) + } + + fn slot((slot, _location): Self::Index) -> Slot { + slot + } +} + +impl ColumnName for columns::DoubleMerkleMeta { + const NAME: &'static str = "double_merkle_meta"; +} + +impl TypedColumn for columns::DoubleMerkleMeta { + type Type = blockstore_meta::DoubleMerkleMeta; +} diff --git a/ledger/src/blockstore_db.rs b/ledger/src/blockstore_db.rs index 4dbb47e27d..51096e73a1 100644 --- a/ledger/src/blockstore_db.rs +++ b/ledger/src/blockstore_db.rs @@ -201,6 +201,7 @@ impl Rocks { new_cf_descriptor::(options, oldest_slot), new_cf_descriptor::(options, oldest_slot), new_cf_descriptor::(options, oldest_slot), + new_cf_descriptor::(options, oldest_slot), ]; // If the access type is Secondary, we don't need to open all of the @@ -249,7 +250,7 @@ impl Rocks { cf_descriptors } - const fn columns() -> [&'static str; 27] { + const fn columns() -> [&'static str; 28] { [ columns::ErasureMeta::NAME, columns::DeadSlots::NAME, @@ -278,6 +279,7 @@ impl Rocks { columns::AlternateShredData::NAME, columns::AlternateMerkleRootMeta::NAME, columns::ParentMeta::NAME, + columns::DoubleMerkleMeta::NAME, ] } diff --git a/ledger/src/blockstore_meta.rs b/ledger/src/blockstore_meta.rs index 4a5a2dd7ba..e0a7f95c9a 100644 --- a/ledger/src/blockstore_meta.rs +++ b/ledger/src/blockstore_meta.rs @@ -11,6 +11,7 @@ use { solana_hash::Hash, std::{ collections::BTreeSet, + fmt::Display, ops::{Range, RangeBounds}, }, }; @@ -464,6 +465,15 @@ impl BlockVersions { } } +impl Display for BlockLocation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BlockLocation::Original => write!(f, "Original"), + BlockLocation::Alternate { block_id } => write!(f, "Alternate({block_id})"), + } + } +} + #[derive(Deserialize, Serialize, Debug, PartialEq, Eq)] pub enum FrozenHashVersioned { Current(FrozenHashStatus), @@ -1000,6 +1010,24 @@ impl ParentMeta { } } +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +pub struct DoubleMerkleMeta { + /// The double merkle root computed as the root of the merkle tree + /// containing the merkle roots of each fec set + the parent info (parent_slot, parent_double_merkle_root) + pub double_merkle_root: Hash, + + /// The number of fec sets in this block + pub fec_set_count: usize, + + /// The merkle proofs + /// index of [0, fec_set_count) corresponds to the proofs for each fec set leaf node + /// index of fec_set_count corresponds to the proof for the parent info node + /// + /// The size of this vec is `fec_set_count + 1` + /// Each inner Vec contains concatenated proof entries for that leaf + pub proofs: Vec>, +} + #[cfg(test)] mod test { use { diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs index 2b459c08b4..f444759b0b 100644 --- a/ledger/src/blockstore_processor.rs +++ b/ledger/src/blockstore_processor.rs @@ -2,7 +2,7 @@ use { crate::{ block_error::BlockError, blockstore::{Blockstore, BlockstoreError}, - blockstore_meta::SlotMeta, + blockstore_meta::{BlockLocation, SlotMeta}, entry_notifier_service::{EntryNotification, EntryNotifierSender}, leader_schedule_cache::LeaderScheduleCache, transaction_balances::compile_collected_balances, @@ -845,6 +845,18 @@ pub enum BlockstoreProcessorError { #[error("block component processor error: {0}")] BlockComponentProcessor(#[from] BlockComponentProcessorError), + + #[error("slot {0} at location {1} not full")] + SlotNotFull(Slot, BlockLocation), + + #[error("slot {0} at location {1} missing parent")] + MissingParent(Slot, BlockLocation), + + #[error("missing merkle root for slot {0}, index {1}")] + MissingMerkleRoot(Slot, u64), + + #[error("double merkle root construction failure for slot {0} at location {1}")] + FailedDoubleMerkleRootConstruction(Slot, BlockLocation), } /// Callback for accessing bank state after each slot is confirmed while diff --git a/ledger/src/shred.rs b/ledger/src/shred.rs index a0e1ffa9c8..495507d736 100644 --- a/ledger/src/shred.rs +++ b/ledger/src/shred.rs @@ -85,7 +85,7 @@ use {solana_keypair::Keypair, solana_perf::packet::Packet, solana_signer::Signer mod common; pub(crate) mod merkle; -mod merkle_tree; +pub(crate) mod merkle_tree; mod payload; mod shred_code; mod shred_data; diff --git a/ledger/src/shred/merkle_tree.rs b/ledger/src/shred/merkle_tree.rs index d9dcd46313..6bdf020a05 100644 --- a/ledger/src/shred/merkle_tree.rs +++ b/ledger/src/shred/merkle_tree.rs @@ -108,7 +108,6 @@ pub fn get_merkle_tree_size(num_shreds: usize) -> usize { } // Maps number of (code + data) shreds to merkle_proof.len(). -#[cfg(test)] pub(crate) const fn get_proof_size(num_shreds: usize) -> u8 { let bits = usize::BITS - num_shreds.leading_zeros(); let proof_size = if num_shreds.is_power_of_two() {