From a48789a7bfd18227ac81086c64ddff6b70ac0e26 Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Thu, 19 Dec 2024 09:10:35 +0100 Subject: [PATCH 01/22] cmd/geth: add prune history command --- cmd/geth/chaincmd.go | 83 ++++++++++++++++++++++++++++++++++++++++++ cmd/geth/main.go | 1 + core/rawdb/database.go | 25 +++++++++---- 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 95239bd64051..ab00f2aac042 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -35,6 +35,7 @@ import ( "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/ethconfig" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/internal/era" "github.com/ethereum/go-ethereum/log" @@ -189,6 +190,18 @@ It's deprecated, please use "geth db import" instead. This command dumps out the state for a given block (or latest, if none provided). `, } + + pruneCommand = &cli.Command{ + Action: pruneHistory, + Name: "prune-history", + Usage: "Prune blockchain history (block bodies and receipts) up to the merge block", + ArgsUsage: "", + Flags: utils.DatabaseFlags, + Description: ` +The prune-history command removes historical block bodies and receipts from the +blockchain database up to the merge block, while preserving block headers. This +helps reduce storage requirements for nodes that don't need full historical data.`, + } ) // initGenesis will initialise the given JSON format genesis file and writes it as @@ -598,3 +611,73 @@ func hashish(x string) bool { _, err := strconv.Atoi(x) return err != nil } + +func pruneHistory(ctx *cli.Context) error { + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + // Open the chain database + chain, chaindb := utils.MakeChain(ctx, stack, false) + defer chaindb.Close() + defer chain.Stop() + + // Pruning only supported for mainnet and sepolia. + if chain.Config().ChainID.Cmp(params.MainnetChainConfig.ChainID) != 0 && chain.Config().ChainID.Cmp(params.SepoliaChainConfig.ChainID) != 0 { + log.Info("Chain pruning not supported for this network") + return nil + } + + // Determine the prune point. This will be the first PoS block. + prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()] + if !ok || prunePoint == nil { + return errors.New("prune point not found") + } + var ( + mergeBlock = prunePoint.BlockNumber + mergeBlockHash = prunePoint.BlockHash.Hex() + ) + + // Check we're far enough past merge to ensure all data is in freezer + currentHeader := chain.CurrentHeader() + if currentHeader == nil { + return errors.New("current header not found") + } + if currentHeader.Number.Uint64() < mergeBlock+params.FullImmutabilityThreshold { + return fmt.Errorf("chain not far enough past merge block, need %d more blocks", + mergeBlock+params.FullImmutabilityThreshold-currentHeader.Number.Uint64()) + } + + // Double-check the prune block in db has the expected hash. + hash := rawdb.ReadCanonicalHash(chaindb, mergeBlock) + if hash != common.HexToHash(mergeBlockHash) { + return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash) + } + + txlookupTail := rawdb.ReadTxIndexTail(chaindb) + + log.Info("Starting chain pruning", + "currentHeight", currentHeader.Number, + "mergeBlock", mergeBlock, + "mergeBlockHash", mergeBlockHash) + + start := time.Now() + + // First prune the transaction lookup index as + // it requires the block bodies. + if txlookupTail != nil && *txlookupTail < mergeBlock { + rawdb.UnindexTransactions(chaindb, *txlookupTail, mergeBlock, nil, false) + } + + // TODO(s1na): what if there is a crash between the two prune operations? + + // Truncate everything up to merge block + if _, err := chaindb.TruncateTail(mergeBlock); err != nil { + return fmt.Errorf("failed to truncate ancient data: %v", err) + } + + log.Info("Chain pruning completed", + "prunedUpTo", mergeBlock, + "elapsed", common.PrettyDuration(time.Since(start))) + + return nil +} diff --git a/cmd/geth/main.go b/cmd/geth/main.go index 07fbeaca5c18..9c0c0d9dfcc0 100644 --- a/cmd/geth/main.go +++ b/cmd/geth/main.go @@ -226,6 +226,7 @@ func init() { removedbCommand, dumpCommand, dumpGenesisCommand, + pruneCommand, // See accountcmd.go: accountCommand, walletCommand, diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 4c87e66cfd65..c3bafdfe760e 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -222,16 +222,25 @@ func NewDatabaseWithFreezer(db ethdb.KeyValueStore, ancient string, namespace st // it to the freezer content. if kvgenesis, _ := db.Get(headerHashKey(0)); len(kvgenesis) > 0 { if frozen, _ := frdb.Ancients(); frozen > 0 { - // If the freezer already contains something, ensure that the genesis blocks - // match, otherwise we might mix up freezers across chains and destroy both - // the freezer and the key-value store. - frgenesis, err := frdb.Ancient(ChainFreezerHashTable, 0) + tail, err := frdb.Tail() if err != nil { printChainMetadata(db) - return nil, fmt.Errorf("failed to retrieve genesis from ancient %v", err) - } else if !bytes.Equal(kvgenesis, frgenesis) { - printChainMetadata(db) - return nil, fmt.Errorf("genesis mismatch: %#x (leveldb) != %#x (ancients)", kvgenesis, frgenesis) + return nil, fmt.Errorf("failed to retrieve tail from freezer %v", err) + } + // If tail > 0, the history has been pruned and genesis block is not anymore in the freezer. + // TODO: we need another way to verify the network for kvdb and freezer match. + if tail == 0 { + // If the freezer already contains something, ensure that the genesis blocks + // match, otherwise we might mix up freezers across chains and destroy both + // the freezer and the key-value store. + frgenesis, err := frdb.Ancient(ChainFreezerHashTable, 0) + if err != nil { + printChainMetadata(db) + return nil, fmt.Errorf("failed to retrieve genesis from ancient %v", err) + } else if !bytes.Equal(kvgenesis, frgenesis) { + printChainMetadata(db) + return nil, fmt.Errorf("genesis mismatch: %#x (leveldb) != %#x (ancients)", kvgenesis, frgenesis) + } } // Key-value store and freezer belong to the same network. Ensure that they // are contiguous, otherwise we might end up with a non-functional freezer. From 0a916cfd1148566cebfa2c269c478498aa28bccb Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Fri, 14 Mar 2025 10:10:20 +0100 Subject: [PATCH 02/22] revert testdata change --- tests/testdata | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testdata b/tests/testdata index 81862e484858..faf33b471465 160000 --- a/tests/testdata +++ b/tests/testdata @@ -1 +1 @@ -Subproject commit 81862e4848585a438d64f911a19b3825f0f4cd95 +Subproject commit faf33b471465d3c6cdc3d04fbd690895f78d33f2 From e933ed48a80cb70a82cfa43d5e3692e726e3ea94 Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Fri, 14 Mar 2025 10:11:36 +0100 Subject: [PATCH 03/22] rm extra check --- cmd/geth/chaincmd.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index ab00f2aac042..a3e51796d0d3 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -621,12 +621,6 @@ func pruneHistory(ctx *cli.Context) error { defer chaindb.Close() defer chain.Stop() - // Pruning only supported for mainnet and sepolia. - if chain.Config().ChainID.Cmp(params.MainnetChainConfig.ChainID) != 0 && chain.Config().ChainID.Cmp(params.SepoliaChainConfig.ChainID) != 0 { - log.Info("Chain pruning not supported for this network") - return nil - } - // Determine the prune point. This will be the first PoS block. prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()] if !ok || prunePoint == nil { From 6f8293d272317053ed43b1b48935f875b9f4cebd Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Fri, 14 Mar 2025 14:57:42 +0100 Subject: [PATCH 04/22] update based on freeezr --- cmd/geth/chaincmd.go | 2 +- core/rawdb/database.go | 25 ++++++++----------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index a3e51796d0d3..e915e0e6cbcf 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -665,7 +665,7 @@ func pruneHistory(ctx *cli.Context) error { // TODO(s1na): what if there is a crash between the two prune operations? // Truncate everything up to merge block - if _, err := chaindb.TruncateTail(mergeBlock); err != nil { + if _, err := chaindb.TruncateTailBlocks(mergeBlock); err != nil { return fmt.Errorf("failed to truncate ancient data: %v", err) } diff --git a/core/rawdb/database.go b/core/rawdb/database.go index c3bafdfe760e..4c87e66cfd65 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -222,25 +222,16 @@ func NewDatabaseWithFreezer(db ethdb.KeyValueStore, ancient string, namespace st // it to the freezer content. if kvgenesis, _ := db.Get(headerHashKey(0)); len(kvgenesis) > 0 { if frozen, _ := frdb.Ancients(); frozen > 0 { - tail, err := frdb.Tail() + // If the freezer already contains something, ensure that the genesis blocks + // match, otherwise we might mix up freezers across chains and destroy both + // the freezer and the key-value store. + frgenesis, err := frdb.Ancient(ChainFreezerHashTable, 0) if err != nil { printChainMetadata(db) - return nil, fmt.Errorf("failed to retrieve tail from freezer %v", err) - } - // If tail > 0, the history has been pruned and genesis block is not anymore in the freezer. - // TODO: we need another way to verify the network for kvdb and freezer match. - if tail == 0 { - // If the freezer already contains something, ensure that the genesis blocks - // match, otherwise we might mix up freezers across chains and destroy both - // the freezer and the key-value store. - frgenesis, err := frdb.Ancient(ChainFreezerHashTable, 0) - if err != nil { - printChainMetadata(db) - return nil, fmt.Errorf("failed to retrieve genesis from ancient %v", err) - } else if !bytes.Equal(kvgenesis, frgenesis) { - printChainMetadata(db) - return nil, fmt.Errorf("genesis mismatch: %#x (leveldb) != %#x (ancients)", kvgenesis, frgenesis) - } + return nil, fmt.Errorf("failed to retrieve genesis from ancient %v", err) + } else if !bytes.Equal(kvgenesis, frgenesis) { + printChainMetadata(db) + return nil, fmt.Errorf("genesis mismatch: %#x (leveldb) != %#x (ancients)", kvgenesis, frgenesis) } // Key-value store and freezer belong to the same network. Ensure that they // are contiguous, otherwise we might end up with a non-functional freezer. From ef5015d498d06e7fd4c961dd638e0799a34706b8 Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Tue, 18 Mar 2025 16:36:02 +0100 Subject: [PATCH 05/22] fix --- cmd/geth/chaincmd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index e915e0e6cbcf..a3e51796d0d3 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -665,7 +665,7 @@ func pruneHistory(ctx *cli.Context) error { // TODO(s1na): what if there is a crash between the two prune operations? // Truncate everything up to merge block - if _, err := chaindb.TruncateTailBlocks(mergeBlock); err != nil { + if _, err := chaindb.TruncateTail(mergeBlock); err != nil { return fmt.Errorf("failed to truncate ancient data: %v", err) } From 55a4a392cbe1820a0e78cb2e226c682046025e5b Mon Sep 17 00:00:00 2001 From: Sina Mahmoodi Date: Fri, 14 Mar 2025 16:07:25 +0100 Subject: [PATCH 06/22] construct genesis block from header --- core/blockchain.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/blockchain.go b/core/blockchain.go index 2bf7fba427f9..d80236c90229 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -332,7 +332,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis bc.prefetcher = newStatePrefetcher(chainConfig, bc.hc) bc.processor = NewStateProcessor(chainConfig, bc.hc) - bc.genesisBlock = bc.GetBlockByNumber(0) + genesisHeader := bc.GetHeaderByNumber(0) + bc.genesisBlock = types.NewBlockWithHeader(genesisHeader) if bc.genesisBlock == nil { return nil, ErrNoGenesis } From 2e470f186510479ef8311a1da46ac9d1a6bce985 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 16:56:09 +0100 Subject: [PATCH 07/22] core/rawdb: add PruneTransactionIndex --- core/rawdb/chain_iterator.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 759e5913d13f..dd9d6a9dfb6a 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -17,6 +17,8 @@ package rawdb import ( + "encoding/binary" + "fmt" "runtime" "sync/atomic" "time" @@ -361,3 +363,28 @@ func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt ch func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) { unindexTransactions(db, from, to, interrupt, hook, false) } + +// PruneTransactionIndex removes all tx index entries below a certain block number. +func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { + index := NewTable(db, string(txLookupPrefix)) + iter := index.NewIterator(nil, nil) + defer iter.Release() + + for iter.Next() { + v := iter.Value() + if len(v) > 8 { + continue // legacy entry + } + bn := decodeNumber(v) + if bn < pruneBlock { + fmt.Println("found entry", iter.Key()) + // db.Delete(iter.Key()) + } + } +} + +func decodeNumber(b []byte) uint64 { + var numBuffer [8]byte + copy(numBuffer[8-len(b):], b) + return binary.BigEndian.Uint64(numBuffer[:]) +} From 403e64ce75b9395c5c37ac9ea1e416cbfb8781cc Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 16:57:12 +0100 Subject: [PATCH 08/22] cmd/geth: use PruneTransactionIndex --- cmd/geth/chaincmd.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index a3e51796d0d3..9c5e532f216b 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -647,8 +647,6 @@ func pruneHistory(ctx *cli.Context) error { return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash) } - txlookupTail := rawdb.ReadTxIndexTail(chaindb) - log.Info("Starting chain pruning", "currentHeight", currentHeader.Number, "mergeBlock", mergeBlock, @@ -656,11 +654,7 @@ func pruneHistory(ctx *cli.Context) error { start := time.Now() - // First prune the transaction lookup index as - // it requires the block bodies. - if txlookupTail != nil && *txlookupTail < mergeBlock { - rawdb.UnindexTransactions(chaindb, *txlookupTail, mergeBlock, nil, false) - } + rawdb.PruneTransactionIndex(chaindb, mergeBlock) // TODO(s1na): what if there is a crash between the two prune operations? From a223ee6c27e8cd2a40322a8d00132f983f24a405 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 17:24:37 +0100 Subject: [PATCH 09/22] core/rawdb: log error for invalid txindex entry --- core/rawdb/chain_iterator.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index dd9d6a9dfb6a..d17b3e7ddf28 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -373,7 +373,8 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { for iter.Next() { v := iter.Value() if len(v) > 8 { - continue // legacy entry + log.Error("Skipping prune legacy tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) + continue } bn := decodeNumber(v) if bn < pruneBlock { From 45c8fe9f284dadc811fc6e0618be3c991130ab39 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 17:59:08 +0100 Subject: [PATCH 10/22] core/rawdb: add test for PruneTransactionIndex and fix bug --- core/rawdb/chain_iterator.go | 7 ++- core/rawdb/chain_iterator_test.go | 84 +++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index d17b3e7ddf28..b940195eb02c 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -366,8 +366,8 @@ func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, in // PruneTransactionIndex removes all tx index entries below a certain block number. func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { - index := NewTable(db, string(txLookupPrefix)) - iter := index.NewIterator(nil, nil) + db = NewTable(db, string(txLookupPrefix)) + iter := db.NewIterator(nil, nil) defer iter.Release() for iter.Next() { @@ -378,8 +378,7 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { } bn := decodeNumber(v) if bn < pruneBlock { - fmt.Println("found entry", iter.Key()) - // db.Delete(iter.Key()) + db.Delete(iter.Key()) } } } diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index 390424f673fc..c711a1ca5426 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -25,6 +25,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/ethdb" ) func TestChainIterator(t *testing.T) { @@ -102,19 +103,18 @@ func TestChainIterator(t *testing.T) { } } -func TestIndexTransactions(t *testing.T) { - // Construct test chain db - chainDb := NewMemoryDatabase() - - var block *types.Block +func initDatabaseWithTransactions(db ethdb.Database) ([]*types.Block, []*types.Transaction) { + var blocks []*types.Block var txs []*types.Transaction to := common.BytesToAddress([]byte{0x11}) // Write empty genesis block - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher()) - WriteBlock(chainDb, block) - WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) + block := types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher()) + WriteBlock(db, block) + WriteCanonicalHash(db, block.Hash(), block.NumberU64()) + blocks = append(blocks, block) + // Create transactions. for i := uint64(1); i <= 10; i++ { var tx *types.Transaction if i%2 == 0 { @@ -138,10 +138,21 @@ func TestIndexTransactions(t *testing.T) { }) } txs = append(txs, tx) - block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher()) - WriteBlock(chainDb, block) - WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64()) + block := types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher()) + WriteBlock(db, block) + WriteCanonicalHash(db, block.Hash(), block.NumberU64()) + blocks = append(blocks, block) } + + return blocks, txs +} + +func TestIndexTransactions(t *testing.T) { + // Construct test chain db + chainDB := NewMemoryDatabase() + + _, txs := initDatabaseWithTransactions(chainDB) + // verify checks whether the tx indices in the range [from, to) // is expected. verify := func(from, to int, exist bool, tail uint64) { @@ -149,7 +160,7 @@ func TestIndexTransactions(t *testing.T) { if i == 0 { continue } - number := ReadTxLookupEntry(chainDb, txs[i-1].Hash()) + number := ReadTxLookupEntry(chainDB, txs[i-1].Hash()) if exist && number == nil { t.Fatalf("Transaction index %d missing", i) } @@ -157,29 +168,29 @@ func TestIndexTransactions(t *testing.T) { t.Fatalf("Transaction index %d is not deleted", i) } } - number := ReadTxIndexTail(chainDb) + number := ReadTxIndexTail(chainDB) if number == nil || *number != tail { t.Fatalf("Transaction tail mismatch") } } - IndexTransactions(chainDb, 5, 11, nil, false) + IndexTransactions(chainDB, 5, 11, nil, false) verify(5, 11, true, 5) verify(0, 5, false, 5) - IndexTransactions(chainDb, 0, 5, nil, false) + IndexTransactions(chainDB, 0, 5, nil, false) verify(0, 11, true, 0) - UnindexTransactions(chainDb, 0, 5, nil, false) + UnindexTransactions(chainDB, 0, 5, nil, false) verify(5, 11, true, 5) verify(0, 5, false, 5) - UnindexTransactions(chainDb, 5, 11, nil, false) + UnindexTransactions(chainDB, 5, 11, nil, false) verify(0, 11, false, 11) // Testing corner cases signal := make(chan struct{}) var once sync.Once - indexTransactionsForTesting(chainDb, 5, 11, signal, func(n uint64) bool { + indexTransactionsForTesting(chainDB, 5, 11, signal, func(n uint64) bool { if n <= 8 { once.Do(func() { close(signal) @@ -190,11 +201,11 @@ func TestIndexTransactions(t *testing.T) { }) verify(9, 11, true, 9) verify(0, 9, false, 9) - IndexTransactions(chainDb, 0, 9, nil, false) + IndexTransactions(chainDB, 0, 9, nil, false) signal = make(chan struct{}) var once2 sync.Once - unindexTransactionsForTesting(chainDb, 0, 11, signal, func(n uint64) bool { + unindexTransactionsForTesting(chainDB, 0, 11, signal, func(n uint64) bool { if n >= 8 { once2.Do(func() { close(signal) @@ -206,3 +217,36 @@ func TestIndexTransactions(t *testing.T) { verify(8, 11, true, 8) verify(0, 8, false, 8) } + +func TestPruneTransactionIndex(t *testing.T) { + chainDB := NewMemoryDatabase() + blocks, _ := initDatabaseWithTransactions(chainDB) + lastBlock := blocks[len(blocks)-1].NumberU64() + pruneBlock := lastBlock - 3 + + IndexTransactions(chainDB, 0, lastBlock+1, nil, false) + + // Check all transactions are in index. + for _, block := range blocks { + for _, tx := range block.Transactions() { + num := ReadTxLookupEntry(chainDB, tx.Hash()) + if num == nil || *num != block.NumberU64() { + t.Fatalf("wrong TxLookup entry: %x -> %v", tx.Hash(), num) + } + } + } + + PruneTransactionIndex(chainDB, pruneBlock) + + // Check transactions from old blocks not included. + for _, block := range blocks { + for _, tx := range block.Transactions() { + num := ReadTxLookupEntry(chainDB, tx.Hash()) + if block.NumberU64() < pruneBlock { + if num != nil { + t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) + } + } + } + } +} From ba740954c286720898fb72400ca2a5ab48d32ae3 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 18:01:08 +0100 Subject: [PATCH 11/22] core/rawdb: add delete error handling --- core/rawdb/chain_iterator.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index b940195eb02c..9c62a63b66a6 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -378,7 +378,9 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { } bn := decodeNumber(v) if bn < pruneBlock { - db.Delete(iter.Key()) + if err := db.Delete(iter.Key()); err != nil { + log.Crit("Error deleting tx lookup entry", "hash", fmt.Sprintf("%x", iter.Key())) + } } } } From 2d29bcfae128e285ec6a63d27d0d636f2c9a3e0b Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 18:05:43 +0100 Subject: [PATCH 12/22] core/rawdb: simplify condition in test --- core/rawdb/chain_iterator_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index c711a1ca5426..cb6119baaa19 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -242,10 +242,8 @@ func TestPruneTransactionIndex(t *testing.T) { for _, block := range blocks { for _, tx := range block.Transactions() { num := ReadTxLookupEntry(chainDB, tx.Hash()) - if block.NumberU64() < pruneBlock { - if num != nil { - t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) - } + if block.NumberU64() < pruneBlock && num != nil { + t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) } } } From 1100dba9650d333a0b17ed3b17a60d7fe3bb03f0 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:08:16 +0100 Subject: [PATCH 13/22] core/rawdb: improve PruneTransactionIndex --- core/rawdb/chain_iterator.go | 16 +++++++++++++--- core/rawdb/chain_iterator_test.go | 3 +++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 9c62a63b66a6..64d252f31be0 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -366,10 +366,19 @@ func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, in // PruneTransactionIndex removes all tx index entries below a certain block number. func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { - db = NewTable(db, string(txLookupPrefix)) - iter := db.NewIterator(nil, nil) - defer iter.Release() + tail := ReadTxIndexTail(db) + if tail == nil { + return // no index + } + if *tail > pruneBlock { + return // index ends above pruneBlock + } + // There are transactions below pruneBlock in the index. Iterate the entire index to + // remove the entries. Note if this fails, the index is messed up, but tail still + // points to the old tail. + iter := db.NewIterator(txLookupPrefix, nil) + defer iter.Release() for iter.Next() { v := iter.Value() if len(v) > 8 { @@ -383,6 +392,7 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { } } } + WriteTxIndexTail(db, pruneBlock) } func decodeNumber(b []byte) uint64 { diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index cb6119baaa19..b6cc5388a4c2 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -245,6 +245,9 @@ func TestPruneTransactionIndex(t *testing.T) { if block.NumberU64() < pruneBlock && num != nil { t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) } + if block.NumberU64() > pruneBlock && (num == nil || *num != block.NumberU64()) { + t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num) + } } } } From bbedddb7d523a9eec5d02e5425d245d9807e8791 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:16:40 +0100 Subject: [PATCH 14/22] core/rawdb: log tx index pruning progress --- core/rawdb/chain_iterator.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 64d252f31be0..cc6350f242bc 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -377,9 +377,14 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { // There are transactions below pruneBlock in the index. Iterate the entire index to // remove the entries. Note if this fails, the index is messed up, but tail still // points to the old tail. - iter := db.NewIterator(txLookupPrefix, nil) + var ( + iter = db.NewIterator(txLookupPrefix, nil) + count int + removed int + ) defer iter.Release() for iter.Next() { + count++ v := iter.Value() if len(v) > 8 { log.Error("Skipping prune legacy tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) @@ -390,8 +395,16 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { if err := db.Delete(iter.Key()); err != nil { log.Crit("Error deleting tx lookup entry", "hash", fmt.Sprintf("%x", iter.Key())) } + removed++ + } + if count%10000 == 0 { + log.Info("Pruning tx index", "count", count, "removed", removed) } } + if iter.Error() != nil { + log.Error("Tx index pruning iterator error", "err", iter.Error()) + } + WriteTxIndexTail(db, pruneBlock) } From 8ba23d5fb7d7f98166a6f7305f7852522fd9166c Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:17:46 +0100 Subject: [PATCH 15/22] core/rawdb: less logging during tx index pruning --- core/rawdb/chain_iterator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index cc6350f242bc..291c54dff7d0 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -397,7 +397,7 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { } removed++ } - if count%10000 == 0 { + if count%1000000 == 0 { log.Info("Pruning tx index", "count", count, "removed", removed) } } From b14765822bbe6f162f7acc1e47b9a08e944cba20 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:25:50 +0100 Subject: [PATCH 16/22] core/rawdb: further reduce logging --- core/rawdb/chain_iterator.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 291c54dff7d0..89bd2c6421ed 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -397,7 +397,7 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { } removed++ } - if count%1000000 == 0 { + if count%10000000 == 0 { log.Info("Pruning tx index", "count", count, "removed", removed) } } From 94b7b6994e61552746446fdc248d30c1ea2060c6 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:26:04 +0100 Subject: [PATCH 17/22] cmd/geth: clean up pruning command --- cmd/geth/chaincmd.go | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 9c5e532f216b..05c8bc4c7c3d 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -647,25 +647,15 @@ func pruneHistory(ctx *cli.Context) error { return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash) } - log.Info("Starting chain pruning", - "currentHeight", currentHeader.Number, - "mergeBlock", mergeBlock, - "mergeBlockHash", mergeBlockHash) - + log.Info("Starting history pruning", "head", currentHeader.Number, "tail", mergeBlock, "tailHash", mergeBlockHash) start := time.Now() - rawdb.PruneTransactionIndex(chaindb, mergeBlock) - - // TODO(s1na): what if there is a crash between the two prune operations? - - // Truncate everything up to merge block if _, err := chaindb.TruncateTail(mergeBlock); err != nil { return fmt.Errorf("failed to truncate ancient data: %v", err) } + log.Info("History pruning completed", "tail", mergeBlock, "elapsed", common.PrettyDuration(time.Since(start))) - log.Info("Chain pruning completed", - "prunedUpTo", mergeBlock, - "elapsed", common.PrettyDuration(time.Since(start))) + // TODO(s1na): what if there is a crash between the two prune operations? return nil } From bcd46db0532164504245c0e0c4c85f18d0b4a3d4 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:44:09 +0100 Subject: [PATCH 18/22] core/rawdb: improve comment --- core/rawdb/chain_iterator.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 89bd2c6421ed..e329598d82d1 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -374,9 +374,9 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { return // index ends above pruneBlock } - // There are transactions below pruneBlock in the index. Iterate the entire index to - // remove the entries. Note if this fails, the index is messed up, but tail still - // points to the old tail. + // There are blocks below pruneBlock in the index. Iterate the entire index to remove + // their entries. Note if this fails, the index is messed up, but tail still points to + // the old tail. var ( iter = db.NewIterator(txLookupPrefix, nil) count int From a20cdbc6f9ddd55ca482ac053698c3caf736f5c2 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Tue, 18 Mar 2025 21:57:59 +0100 Subject: [PATCH 19/22] core/rawdb: improve logs --- core/rawdb/chain_iterator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index e329598d82d1..85ebfbb234c8 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -387,13 +387,13 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { count++ v := iter.Value() if len(v) > 8 { - log.Error("Skipping prune legacy tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) + log.Error("Skipping legacy tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) continue } bn := decodeNumber(v) if bn < pruneBlock { if err := db.Delete(iter.Key()); err != nil { - log.Crit("Error deleting tx lookup entry", "hash", fmt.Sprintf("%x", iter.Key())) + log.Crit("Error deleting tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) } removed++ } From 36cb9a7d6dfb52e5ff047625ef7535e84447eaf0 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 19 Mar 2025 09:07:02 +0100 Subject: [PATCH 20/22] core/rawdb: fix comparison in test --- core/rawdb/chain_iterator_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rawdb/chain_iterator_test.go b/core/rawdb/chain_iterator_test.go index b6cc5388a4c2..75bd5a9a942c 100644 --- a/core/rawdb/chain_iterator_test.go +++ b/core/rawdb/chain_iterator_test.go @@ -245,7 +245,7 @@ func TestPruneTransactionIndex(t *testing.T) { if block.NumberU64() < pruneBlock && num != nil { t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num) } - if block.NumberU64() > pruneBlock && (num == nil || *num != block.NumberU64()) { + if block.NumberU64() >= pruneBlock && (num == nil || *num != block.NumberU64()) { t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num) } } From 36e58796479290f7846422d1e1e477a630360b99 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Wed, 19 Mar 2025 09:09:08 +0100 Subject: [PATCH 21/22] core/rawdb: simplify condition --- core/rawdb/chain_iterator.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 85ebfbb234c8..3371669eab3f 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -367,11 +367,8 @@ func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, in // PruneTransactionIndex removes all tx index entries below a certain block number. func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { tail := ReadTxIndexTail(db) - if tail == nil { - return // no index - } - if *tail > pruneBlock { - return // index ends above pruneBlock + if tail == nil || *tail > pruneBlock { + return // no index, or index ends above pruneBlock } // There are blocks below pruneBlock in the index. Iterate the entire index to remove From 04d8208d43e7db89337d3b5758d81416b35967aa Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Fri, 21 Mar 2025 11:43:02 +0100 Subject: [PATCH 22/22] core/rawdb: use DeleteAllTxLookupEntries in PruneTransactionIndex --- core/rawdb/accessors_indexes.go | 5 +++-- core/rawdb/chain_iterator.go | 33 ++++++++++----------------------- core/txindexer.go | 3 ++- 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/core/rawdb/accessors_indexes.go b/core/rawdb/accessors_indexes.go index 342aedd8dc3b..7bb96b1fa186 100644 --- a/core/rawdb/accessors_indexes.go +++ b/core/rawdb/accessors_indexes.go @@ -103,13 +103,14 @@ func DeleteTxLookupEntries(db ethdb.KeyValueWriter, hashes []common.Hash) { // DeleteAllTxLookupEntries purges all the transaction indexes in the database. // If condition is specified, only the entry with condition as True will be // removed; If condition is not specified, the entry is deleted. -func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func([]byte) bool) { +func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash, []byte) bool) { iter := NewKeyLengthIterator(db.NewIterator(txLookupPrefix, nil), common.HashLength+len(txLookupPrefix)) defer iter.Release() batch := db.NewBatch() for iter.Next() { - if condition == nil || condition(iter.Value()) { + txhash := common.Hash(iter.Key()[1:]) + if condition == nil || condition(txhash, iter.Value()) { batch.Delete(iter.Key()) } if batch.ValueSize() >= ethdb.IdealBatchSize { diff --git a/core/rawdb/chain_iterator.go b/core/rawdb/chain_iterator.go index 3371669eab3f..ecbc44e1f1ee 100644 --- a/core/rawdb/chain_iterator.go +++ b/core/rawdb/chain_iterator.go @@ -18,7 +18,6 @@ package rawdb import ( "encoding/binary" - "fmt" "runtime" "sync/atomic" "time" @@ -370,38 +369,26 @@ func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) { if tail == nil || *tail > pruneBlock { return // no index, or index ends above pruneBlock } - // There are blocks below pruneBlock in the index. Iterate the entire index to remove // their entries. Note if this fails, the index is messed up, but tail still points to // the old tail. - var ( - iter = db.NewIterator(txLookupPrefix, nil) - count int - removed int - ) - defer iter.Release() - for iter.Next() { + var count, removed int + DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool { count++ - v := iter.Value() + if count%10000000 == 0 { + log.Info("Pruning tx index", "count", count, "removed", removed) + } if len(v) > 8 { - log.Error("Skipping legacy tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) - continue + log.Error("Skipping legacy tx index entry", "hash", txhash) + return false } bn := decodeNumber(v) if bn < pruneBlock { - if err := db.Delete(iter.Key()); err != nil { - log.Crit("Error deleting tx index entry", "hash", fmt.Sprintf("%x", iter.Key())) - } removed++ + return true } - if count%10000000 == 0 { - log.Info("Pruning tx index", "count", count, "removed", removed) - } - } - if iter.Error() != nil { - log.Error("Tx index pruning iterator error", "err", iter.Error()) - } - + return false + }) WriteTxIndexTail(db, pruneBlock) } diff --git a/core/txindexer.go b/core/txindexer.go index 29e87905d509..d0fce302f3eb 100644 --- a/core/txindexer.go +++ b/core/txindexer.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" @@ -186,7 +187,7 @@ func (indexer *txIndexer) repair(head uint64) { // potentially leaving dangling indexes in the database. // However, this is considered acceptable. rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff) - rawdb.DeleteAllTxLookupEntries(indexer.db, func(blob []byte) bool { + rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool { n := rawdb.DecodeTxLookupEntry(blob, indexer.db) return n != nil && *n < indexer.cutoff })