Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions cmd/geth/chaincmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/ethereum/go-ethereum/core/state"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/eth/ethconfig"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/internal/era"
"github.com/ethereum/go-ethereum/log"
Expand Down Expand Up @@ -189,6 +190,18 @@ It's deprecated, please use "geth db import" instead.
This command dumps out the state for a given block (or latest, if none provided).
`,
}

pruneCommand = &cli.Command{
Action: pruneHistory,
Name: "prune-history",
Usage: "Prune blockchain history (block bodies and receipts) up to the merge block",
ArgsUsage: "",
Flags: utils.DatabaseFlags,
Description: `
The prune-history command removes historical block bodies and receipts from the
blockchain database up to the merge block, while preserving block headers. This
helps reduce storage requirements for nodes that don't need full historical data.`,
}
)

// initGenesis will initialise the given JSON format genesis file and writes it as
Expand Down Expand Up @@ -598,3 +611,51 @@ func hashish(x string) bool {
_, err := strconv.Atoi(x)
return err != nil
}

func pruneHistory(ctx *cli.Context) error {
stack, _ := makeConfigNode(ctx)
defer stack.Close()

// Open the chain database
chain, chaindb := utils.MakeChain(ctx, stack, false)
defer chaindb.Close()
defer chain.Stop()

// Determine the prune point. This will be the first PoS block.
prunePoint, ok := ethconfig.HistoryPrunePoints[chain.Genesis().Hash()]
if !ok || prunePoint == nil {
return errors.New("prune point not found")
}
var (
mergeBlock = prunePoint.BlockNumber
mergeBlockHash = prunePoint.BlockHash.Hex()
)

// Check we're far enough past merge to ensure all data is in freezer
currentHeader := chain.CurrentHeader()
if currentHeader == nil {
return errors.New("current header not found")
}
if currentHeader.Number.Uint64() < mergeBlock+params.FullImmutabilityThreshold {
return fmt.Errorf("chain not far enough past merge block, need %d more blocks",
mergeBlock+params.FullImmutabilityThreshold-currentHeader.Number.Uint64())
}

// Double-check the prune block in db has the expected hash.
hash := rawdb.ReadCanonicalHash(chaindb, mergeBlock)
if hash != common.HexToHash(mergeBlockHash) {
return fmt.Errorf("merge block hash mismatch: got %s, want %s", hash.Hex(), mergeBlockHash)
}

log.Info("Starting history pruning", "head", currentHeader.Number, "tail", mergeBlock, "tailHash", mergeBlockHash)
start := time.Now()
rawdb.PruneTransactionIndex(chaindb, mergeBlock)
if _, err := chaindb.TruncateTail(mergeBlock); err != nil {
return fmt.Errorf("failed to truncate ancient data: %v", err)
}
log.Info("History pruning completed", "tail", mergeBlock, "elapsed", common.PrettyDuration(time.Since(start)))

// TODO(s1na): what if there is a crash between the two prune operations?

return nil
}
1 change: 1 addition & 0 deletions cmd/geth/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ func init() {
removedbCommand,
dumpCommand,
dumpGenesisCommand,
pruneCommand,
// See accountcmd.go:
accountCommand,
walletCommand,
Expand Down
3 changes: 2 additions & 1 deletion core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,8 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, genesis *Genesis
bc.prefetcher = newStatePrefetcher(chainConfig, bc.hc)
bc.processor = NewStateProcessor(chainConfig, bc.hc)

bc.genesisBlock = bc.GetBlockByNumber(0)
genesisHeader := bc.GetHeaderByNumber(0)
bc.genesisBlock = types.NewBlockWithHeader(genesisHeader)
if bc.genesisBlock == nil {
return nil, ErrNoGenesis
}
Expand Down
5 changes: 3 additions & 2 deletions core/rawdb/accessors_indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ func DeleteTxLookupEntries(db ethdb.KeyValueWriter, hashes []common.Hash) {
// DeleteAllTxLookupEntries purges all the transaction indexes in the database.
// If condition is specified, only the entry with condition as True will be
// removed; If condition is not specified, the entry is deleted.
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func([]byte) bool) {
func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash, []byte) bool) {
iter := NewKeyLengthIterator(db.NewIterator(txLookupPrefix, nil), common.HashLength+len(txLookupPrefix))
defer iter.Release()

batch := db.NewBatch()
for iter.Next() {
if condition == nil || condition(iter.Value()) {
txhash := common.Hash(iter.Key()[1:])
if condition == nil || condition(txhash, iter.Value()) {
batch.Delete(iter.Key())
}
if batch.ValueSize() >= ethdb.IdealBatchSize {
Expand Down
36 changes: 36 additions & 0 deletions core/rawdb/chain_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package rawdb

import (
"encoding/binary"
"runtime"
"sync/atomic"
"time"
Expand Down Expand Up @@ -361,3 +362,38 @@ func UnindexTransactions(db ethdb.Database, from uint64, to uint64, interrupt ch
func unindexTransactionsForTesting(db ethdb.Database, from uint64, to uint64, interrupt chan struct{}, hook func(uint64) bool) {
unindexTransactions(db, from, to, interrupt, hook, false)
}

// PruneTransactionIndex removes all tx index entries below a certain block number.
func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any particular reason to unindex the transactions by traversing the entire tx lookups in db? Why not reuse the UnindexTransaction with block range?

e.g.

// PruneTransactionIndex removes all tx index entries below a certain block number.
func PruneTransactionIndex(db ethdb.Database, pruneBlock uint64) {
	tail := ReadTxIndexTail(db)
	if tail == nil || *tail > pruneBlock {
		return // no index, or index ends above pruneBlock
	}
	UnindexTransactions(db, *tail, pruneBlock, nil, true)
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We decided to implement it like this because unindexing everything takes 45 minutes, while this scan takes only 1.5min (on Sepolia).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh really, interesting

tail := ReadTxIndexTail(db)
if tail == nil || *tail > pruneBlock {
return // no index, or index ends above pruneBlock
}
// There are blocks below pruneBlock in the index. Iterate the entire index to remove
// their entries. Note if this fails, the index is messed up, but tail still points to
// the old tail.
var count, removed int
DeleteAllTxLookupEntries(db, func(txhash common.Hash, v []byte) bool {
count++
if count%10000000 == 0 {
log.Info("Pruning tx index", "count", count, "removed", removed)
}
if len(v) > 8 {
log.Error("Skipping legacy tx index entry", "hash", txhash)
return false
}
bn := decodeNumber(v)
if bn < pruneBlock {
removed++
return true
}
return false
})
WriteTxIndexTail(db, pruneBlock)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's dangerous to delete the transaction indexes first and then update the tail.
As it can lead to a weird situation that corresponding transaction index is not found while the tail indicates it should exist.

Anyway, I think we can reuse the one here https://github.com/s1na/go-ethereum/blob/txlookup-cutoff/core/rawdb/accessors_indexes.go#L106

}

func decodeNumber(b []byte) uint64 {
var numBuffer [8]byte
copy(numBuffer[8-len(b):], b)
return binary.BigEndian.Uint64(numBuffer[:])
}
85 changes: 65 additions & 20 deletions core/rawdb/chain_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
)

func TestChainIterator(t *testing.T) {
Expand Down Expand Up @@ -102,19 +103,18 @@ func TestChainIterator(t *testing.T) {
}
}

func TestIndexTransactions(t *testing.T) {
// Construct test chain db
chainDb := NewMemoryDatabase()

var block *types.Block
func initDatabaseWithTransactions(db ethdb.Database) ([]*types.Block, []*types.Transaction) {
var blocks []*types.Block
var txs []*types.Transaction
to := common.BytesToAddress([]byte{0x11})

// Write empty genesis block
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
WriteBlock(chainDb, block)
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(0))}, nil, nil, newTestHasher())
WriteBlock(db, block)
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
blocks = append(blocks, block)

// Create transactions.
for i := uint64(1); i <= 10; i++ {
var tx *types.Transaction
if i%2 == 0 {
Expand All @@ -138,48 +138,59 @@ func TestIndexTransactions(t *testing.T) {
})
}
txs = append(txs, tx)
block = types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
WriteBlock(chainDb, block)
WriteCanonicalHash(chainDb, block.Hash(), block.NumberU64())
block := types.NewBlock(&types.Header{Number: big.NewInt(int64(i))}, &types.Body{Transactions: types.Transactions{tx}}, nil, newTestHasher())
WriteBlock(db, block)
WriteCanonicalHash(db, block.Hash(), block.NumberU64())
blocks = append(blocks, block)
}

return blocks, txs
}

func TestIndexTransactions(t *testing.T) {
// Construct test chain db
chainDB := NewMemoryDatabase()

_, txs := initDatabaseWithTransactions(chainDB)

// verify checks whether the tx indices in the range [from, to)
// is expected.
verify := func(from, to int, exist bool, tail uint64) {
for i := from; i < to; i++ {
if i == 0 {
continue
}
number := ReadTxLookupEntry(chainDb, txs[i-1].Hash())
number := ReadTxLookupEntry(chainDB, txs[i-1].Hash())
if exist && number == nil {
t.Fatalf("Transaction index %d missing", i)
}
if !exist && number != nil {
t.Fatalf("Transaction index %d is not deleted", i)
}
}
number := ReadTxIndexTail(chainDb)
number := ReadTxIndexTail(chainDB)
if number == nil || *number != tail {
t.Fatalf("Transaction tail mismatch")
}
}
IndexTransactions(chainDb, 5, 11, nil, false)
IndexTransactions(chainDB, 5, 11, nil, false)
verify(5, 11, true, 5)
verify(0, 5, false, 5)

IndexTransactions(chainDb, 0, 5, nil, false)
IndexTransactions(chainDB, 0, 5, nil, false)
verify(0, 11, true, 0)

UnindexTransactions(chainDb, 0, 5, nil, false)
UnindexTransactions(chainDB, 0, 5, nil, false)
verify(5, 11, true, 5)
verify(0, 5, false, 5)

UnindexTransactions(chainDb, 5, 11, nil, false)
UnindexTransactions(chainDB, 5, 11, nil, false)
verify(0, 11, false, 11)

// Testing corner cases
signal := make(chan struct{})
var once sync.Once
indexTransactionsForTesting(chainDb, 5, 11, signal, func(n uint64) bool {
indexTransactionsForTesting(chainDB, 5, 11, signal, func(n uint64) bool {
if n <= 8 {
once.Do(func() {
close(signal)
Expand All @@ -190,11 +201,11 @@ func TestIndexTransactions(t *testing.T) {
})
verify(9, 11, true, 9)
verify(0, 9, false, 9)
IndexTransactions(chainDb, 0, 9, nil, false)
IndexTransactions(chainDB, 0, 9, nil, false)

signal = make(chan struct{})
var once2 sync.Once
unindexTransactionsForTesting(chainDb, 0, 11, signal, func(n uint64) bool {
unindexTransactionsForTesting(chainDB, 0, 11, signal, func(n uint64) bool {
if n >= 8 {
once2.Do(func() {
close(signal)
Expand All @@ -206,3 +217,37 @@ func TestIndexTransactions(t *testing.T) {
verify(8, 11, true, 8)
verify(0, 8, false, 8)
}

func TestPruneTransactionIndex(t *testing.T) {
chainDB := NewMemoryDatabase()
blocks, _ := initDatabaseWithTransactions(chainDB)
lastBlock := blocks[len(blocks)-1].NumberU64()
pruneBlock := lastBlock - 3

IndexTransactions(chainDB, 0, lastBlock+1, nil, false)

// Check all transactions are in index.
for _, block := range blocks {
for _, tx := range block.Transactions() {
num := ReadTxLookupEntry(chainDB, tx.Hash())
if num == nil || *num != block.NumberU64() {
t.Fatalf("wrong TxLookup entry: %x -> %v", tx.Hash(), num)
}
}
}

PruneTransactionIndex(chainDB, pruneBlock)

// Check transactions from old blocks not included.
for _, block := range blocks {
for _, tx := range block.Transactions() {
num := ReadTxLookupEntry(chainDB, tx.Hash())
if block.NumberU64() < pruneBlock && num != nil {
t.Fatalf("TxLookup entry not removed: %x -> %v", tx.Hash(), num)
}
if block.NumberU64() >= pruneBlock && (num == nil || *num != block.NumberU64()) {
t.Fatalf("wrong TxLookup entry after pruning: %x -> %v", tx.Hash(), num)
}
}
}
}
3 changes: 2 additions & 1 deletion core/txindexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"fmt"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
Expand Down Expand Up @@ -186,7 +187,7 @@ func (indexer *txIndexer) repair(head uint64) {
// potentially leaving dangling indexes in the database.
// However, this is considered acceptable.
rawdb.WriteTxIndexTail(indexer.db, indexer.cutoff)
rawdb.DeleteAllTxLookupEntries(indexer.db, func(blob []byte) bool {
rawdb.DeleteAllTxLookupEntries(indexer.db, func(txhash common.Hash, blob []byte) bool {
n := rawdb.DecodeTxLookupEntry(blob, indexer.db)
return n != nil && *n < indexer.cutoff
})
Expand Down
2 changes: 1 addition & 1 deletion tests/testdata