diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 95239bd64051..05c8bc4c7c3d 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/era" "github.com/ethereum/go-ethereum/log" @@ -189,6 +190,18 @@ It's deprecated, please use "geth db import" instead. This command dumps out the state for a given block (or latest, if none provided). `, } + + pruneCommand = &cli.Command{ + Action: pruneHistory, + Name: "prune-history", + Usage: "Prune blockchain history (block bodies and receipts) up to the merge block", + ArgsUsage: "", + Flags: utils.DatabaseFlags, + Description: ` +The prune-history command removes historical block bodies and receipts from the +blockchain database up to the merge block, while preserving block headers. This +helps reduce storage requirements for nodes that don't need full historical data.`, + } ) // initGenesis will initialise the given JSON format genesis file and writes it as @@ -598,3 +611,51 @@ func hashish(x string) bool { _, err := strconv.Atoi(x) return err != nil } + +func pruneHistory(ctx *cli.Context) error { + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + // Open the chain database + chain, chaindb := utils.MakeChain(ctx, stack, false) + defer chaindb.Close() + defer chain.Stop() + + // Determine the prune point. This will be the first PoS block. + prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()] + if !ok || prunePoint == nil { + return errors.New("prune point not found") + } + var ( + mergeBlock = prunePoint.BlockNumber + mergeBlockHash = prunePoint.BlockHash.Hex() + ) + + // Check we're far enough past merge to ensure all data is in freezer + currentHeader := chain.CurrentHeader() + if currentHeader == nil { + return errors.New("current header not found") + } + if currentHeader.Number.Uint64() < mergeBlock+params.FullImmutabilityThreshold { + return fmt.Errorf("chain not far enough past merge block, need %d more blocks", + mergeBlock+params.FullImmutabilityThreshold-currentHeader.Number.Uint64()) + } + + // Double-check the prune block in db has the expected hash. + hash := rawdb.ReadCanonicalHash(chaindb, mergeBlock) + if hash != common.HexToHash(mergeBlockHash) { + return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash) + } + + log.Info("Starting history pruning", "head", currentHeader.Number, "tail", mergeBlock, "tailHash", mergeBlockHash) + start := time.Now() + rawdb.PruneTransactionIndex(chaindb, mergeBlock) + if _, err := chaindb.TruncateTail(mergeBlock); err != nil { + return fmt.Errorf("failed to truncate ancient data: %v", err) + } + log.Info("History pruning completed", "tail", mergeBlock, "elapsed", common.PrettyDuration(time.Since(start))) + + // TODO(s1na): what if there is a crash between the two prune operations? + + return nil +} diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 07fbeaca5c18..9c0c0d9dfcc0 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -226,6 +226,7 @@ func init() { removedbCommand, dumpCommand, dumpGenesisCommand, + pruneCommand, // See accountcmd.go: accountCommand, walletCommand, diff --git a/core/blockchain.go b/core/blockchain.go index 2bf7fba427f9..d80236c90229 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -332,7 +332,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis bc.prefetcher = newStatePrefetcher(chainConfig, bc.hc) bc.processor = NewStateProcessor(chainConfig, bc.hc) - bc.genesisBlock = bc.GetBlockByNumber(0) + genesisHeader := bc.GetHeaderByNumber(0) + bc.genesisBlock = types.NewBlockWithHeader(genesisHeader) if bc.genesisBlock == nil { return nil, ErrNoGenesis } diff --git a/core/rawdb/accessors_indexes.go b/core/rawdb/accessors_indexes.go index 342aedd8dc3b..7bb96b1fa186 100644 --- a/core/rawdb/accessors_indexes.go +++ b/core/rawdb/accessors_indexes.go @@ -103,13 +103,14 @@ func DeleteTxLookupEntries(db ethdb.KeyValueWriter, hashes []common.Hash) { // DeleteAllTxLookupEntries purges all the transaction indexes in the database. // If condition is specified, only the entry with condition as True will be // removed; If condition is not specified, the entry is deleted. -func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func([]byte) bool) { +func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash, []byte) bool) { iter := NewKeyLengthIterator(db.NewIterator(txLookupPrefix, nil), common.HashLength+len(txLookupPrefix)) defer iter.Release() batch := db.NewBatch() for iter.Next() { - if condition == nil || condition(iter.Value()) { + txhash := common.Hash(iter.Key()[1:]) + if condition == nil || condition(txhash, iter.Value()) { batch.Delete(iter.Key()) } if batch.ValueSize() >= ethdb.IdealBatchSize { diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 759e5913d13f..ecbc44e1f1ee 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -17,6 +17,7 @@ package rawdb import ( + "encoding/binary" "runtime" "sync/atomic" "time" @@ -361,3 +362,38 @@ func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt ch func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { unindexTransactions(db, from, to, interrupt, hook, false) } + +// PruneTransactionIndex removes all tx index entries below a certain block number. +func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { + tail := ReadTxIndexTail(db) + if tail == nil || *tail > pruneBlock { + return // no index, or index ends above pruneBlock + } + // There are blocks below pruneBlock in the index. Iterate the entire index to remove + // their entries. Note if this fails, the index is messed up, but tail still points to + // the old tail. + var count, removed int + DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool { + count++ + if count%10000000 == 0 { + log.Info("Pruning tx index", "count", count, "removed", removed) + } + if len(v) > 8 { + log.Error("Skipping legacy tx index entry", "hash", txhash) + return false + } + bn := decodeNumber(v) + if bn < pruneBlock { + removed++ + return true + } + return false + }) + WriteTxIndexTail(db, pruneBlock) +} + +func decodeNumber(b []byte) uint64 { + var numBuffer [8]byte + copy(numBuffer[8-len(b):], b) + return binary.BigEndian.Uint64(numBuffer[:]) +} diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index 390424f673fc..75bd5a9a942c 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" ) func TestChainIterator(t *testing.T) { @@ -102,19 +103,18 @@ func TestChainIterator(t *testing.T) { } } -func TestIndexTransactions(t *testing.T) { - // Construct test chain db - chainDb := NewMemoryDatabase() - - var block *types.Block +func initDatabaseWithTransactions(db ethdb.Database) ([]*types.Block, []*types.Transaction) { + var blocks []*types.Block var txs []*types.Transaction to := common.BytesToAddress([]byte{0x11}) // Write empty genesis block - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher()) - WriteBlock(chainDb, block) - WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) + block := types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher()) + WriteBlock(db, block) + WriteCanonicalHash(db, block.Hash(), block.NumberU64()) + blocks = append(blocks, block) + // Create transactions. for i := uint64(1); i <= 10; i++ { var tx *types.Transaction if i%2 == 0 { @@ -138,10 +138,21 @@ func TestIndexTransactions(t *testing.T) { }) } txs = append(txs, tx) - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher()) - WriteBlock(chainDb, block) - WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) + block := types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher()) + WriteBlock(db, block) + WriteCanonicalHash(db, block.Hash(), block.NumberU64()) + blocks = append(blocks, block) } + + return blocks, txs +} + +func TestIndexTransactions(t *testing.T) { + // Construct test chain db + chainDB := NewMemoryDatabase() + + _, txs := initDatabaseWithTransactions(chainDB) + // verify checks whether the tx indices in the range [from, to) // is expected. verify := func(from, to int, exist bool, tail uint64) { @@ -149,7 +160,7 @@ func TestIndexTransactions(t *testing.T) { if i == 0 { continue } - number := ReadTxLookupEntry(chainDb, txs[i-1].Hash()) + number := ReadTxLookupEntry(chainDB, txs[i-1].Hash()) if exist && number == nil { t.Fatalf("Transaction index %d missing", i) } @@ -157,29 +168,29 @@ func TestIndexTransactions(t *testing.T) { t.Fatalf("Transaction index %d is not deleted", i) } } - number := ReadTxIndexTail(chainDb) + number := ReadTxIndexTail(chainDB) if number == nil || *number != tail { t.Fatalf("Transaction tail mismatch") } } - IndexTransactions(chainDb, 5, 11, nil, false) + IndexTransactions(chainDB, 5, 11, nil, false) verify(5, 11, true, 5) verify(0, 5, false, 5) - IndexTransactions(chainDb, 0, 5, nil, false) + IndexTransactions(chainDB, 0, 5, nil, false) verify(0, 11, true, 0) - UnindexTransactions(chainDb, 0, 5, nil, false) + UnindexTransactions(chainDB, 0, 5, nil, false) verify(5, 11, true, 5) verify(0, 5, false, 5) - UnindexTransactions(chainDb, 5, 11, nil, false) + UnindexTransactions(chainDB, 5, 11, nil, false) verify(0, 11, false, 11) // Testing corner cases signal := make(chan struct{}) var once sync.Once - indexTransactionsForTesting(chainDb, 5, 11, signal, func(n uint64) bool { + indexTransactionsForTesting(chainDB, 5, 11, signal, func(n uint64) bool { if n <= 8 { once.Do(func() { close(signal) @@ -190,11 +201,11 @@ func TestIndexTransactions(t *testing.T) { }) verify(9, 11, true, 9) verify(0, 9, false, 9) - IndexTransactions(chainDb, 0, 9, nil, false) + IndexTransactions(chainDB, 0, 9, nil, false) signal = make(chan struct{}) var once2 sync.Once - unindexTransactionsForTesting(chainDb, 0, 11, signal, func(n uint64) bool { + unindexTransactionsForTesting(chainDB, 0, 11, signal, func(n uint64) bool { if n >= 8 { once2.Do(func() { close(signal) @@ -206,3 +217,37 @@ func TestIndexTransactions(t *testing.T) { verify(8, 11, true, 8) verify(0, 8, false, 8) } + +func TestPruneTransactionIndex(t *testing.T) { + chainDB := NewMemoryDatabase() + blocks, _ := initDatabaseWithTransactions(chainDB) + lastBlock := blocks[len(blocks)-1].NumberU64() + pruneBlock := lastBlock - 3 + + IndexTransactions(chainDB, 0, lastBlock+1, nil, false) + + // Check all transactions are in index. + for _, block := range blocks { + for _, tx := range block.Transactions() { + num := ReadTxLookupEntry(chainDB, tx.Hash()) + if num == nil || *num != block.NumberU64() { + t.Fatalf("wrong TxLookup entry: %x -> %v", tx.Hash(), num) + } + } + } + + PruneTransactionIndex(chainDB, pruneBlock) + + // Check transactions from old blocks not included. + for _, block := range blocks { + for _, tx := range block.Transactions() { + num := ReadTxLookupEntry(chainDB, tx.Hash()) + if block.NumberU64() < pruneBlock && num != nil { + t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) + } + if block.NumberU64() >= pruneBlock && (num == nil || *num != block.NumberU64()) { + t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num) + } + } + } +} diff --git a/core/txindexer.go b/core/txindexer.go index 29e87905d509..d0fce302f3eb 100644 --- a/core/txindexer.go +++ b/core/txindexer.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" @@ -186,7 +187,7 @@ func (indexer *txIndexer) repair(head uint64) { // potentially leaving dangling indexes in the database. // However, this is considered acceptable. rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff) - rawdb.DeleteAllTxLookupEntries(indexer.db, func(blob []byte) bool { + rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool { n := rawdb.DecodeTxLookupEntry(blob, indexer.db) return n != nil && *n < indexer.cutoff }) diff --git a/tests/testdata b/tests/testdata index 81862e484858..faf33b471465 160000 --- a/tests/testdata +++ b/tests/testdata @@ -1 +1 @@ -Subproject commit 81862e4848585a438d64f911a19b3825f0f4cd95 +Subproject commit faf33b471465d3c6cdc3d04fbd690895f78d33f2