-
Notifications
You must be signed in to change notification settings - Fork 21.6k
cmd, core/state: implement state pruner #21042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
9107ee4
cmd, core, tests: initial state pruner
rjl493456442 ba8efc6
core: fix db inspector
rjl493456442 1467930
cmd/geth: add verify-state
rjl493456442 6d3f487
cmd/geth: add verification tool
rjl493456442 4d11dcd
core/rawdb: implement flatdb
rjl493456442 732d171
cmd, core: fix rebase
rjl493456442 e904d90
core/state: use new contract code layout
rjl493456442 8c0f30f
core/state/pruner: avoid deleting genesis state
rjl493456442 0b97ce3
cmd/geth: add helper function
rjl493456442 6d7e028
core, cmd: fix extract genesis
rjl493456442 1f08997
core: minor fixes
rjl493456442 5747c8f
all: update sum
rjl493456442 80f1576
contracts: remove useless
rjl493456442 394312f
core/state/snapshot: plugin stacktrie
rjl493456442 e29bba8
core: polish
rjl493456442 cdc2476
core/state/snapshot: iterate storage concurrently
rjl493456442 226fa8f
core/state/snapshot: fix iteration
rjl493456442 5eff18c
core: add comments
rjl493456442 d86e334
core/state/snapshot: polish code
rjl493456442 c1caa9a
core/state: polish
rjl493456442 3fe484c
core/state/snapshot: rebase
rjl493456442 ba02664
core/rawdb: add comments
rjl493456442 04c76bb
core/rawdb: fix tests
rjl493456442 8fe9b9f
core/rawdb: improve tests
rjl493456442 b6f8f89
core/state/snapshot: fix concurrent iteration
rjl493456442 82e8046
core/state: run pruning during the recovery
rjl493456442 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,369 @@ | ||
| // Copyright 2020 The go-ethereum Authors | ||
| // This file is part of go-ethereum. | ||
| // | ||
| // go-ethereum is free software: you can redistribute it and/or modify | ||
| // it under the terms of the GNU General Public License as published by | ||
| // the Free Software Foundation, either version 3 of the License, or | ||
| // (at your option) any later version. | ||
| // | ||
| // go-ethereum is distributed in the hope that it will be useful, | ||
| // but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| // GNU General Public License for more details. | ||
| // | ||
| // You should have received a copy of the GNU General Public License | ||
| // along with go-ethereum. If not, see <http://www.gnu.org/licenses/>. | ||
|
|
||
| package main | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "fmt" | ||
| "math/big" | ||
| "os" | ||
| "time" | ||
|
|
||
| "github.com/ethereum/go-ethereum/cmd/utils" | ||
| "github.com/ethereum/go-ethereum/common" | ||
| "github.com/ethereum/go-ethereum/core/rawdb" | ||
| "github.com/ethereum/go-ethereum/core/state/pruner" | ||
| "github.com/ethereum/go-ethereum/core/state/snapshot" | ||
| "github.com/ethereum/go-ethereum/crypto" | ||
| "github.com/ethereum/go-ethereum/log" | ||
| "github.com/ethereum/go-ethereum/rlp" | ||
| "github.com/ethereum/go-ethereum/trie" | ||
| cli "gopkg.in/urfave/cli.v1" | ||
| ) | ||
|
|
||
| var ( | ||
| snapshotCommand = cli.Command{ | ||
| Name: "snapshot", | ||
| Usage: "A set of commands based on the snapshot", | ||
| Category: "MISCELLANEOUS COMMANDS", | ||
| Description: "", | ||
| Subcommands: []cli.Command{ | ||
| { | ||
| Name: "prune-state", | ||
| Usage: "Prune stale ethereum state data based on snapshot", | ||
| ArgsUsage: "<root>", | ||
| Action: utils.MigrateFlags(pruneState), | ||
| Category: "MISCELLANEOUS COMMANDS", | ||
| Flags: []cli.Flag{ | ||
| utils.DataDirFlag, | ||
| utils.RopstenFlag, | ||
| utils.RinkebyFlag, | ||
| utils.GoerliFlag, | ||
| utils.LegacyTestnetFlag, | ||
| }, | ||
| Description: ` | ||
| geth snapshot prune-state <state-root> | ||
| will prune historical state data with the help of state snapshot. | ||
| All trie nodes that do not belong to the specified version state | ||
| will be deleted from the database. | ||
| `, | ||
| }, | ||
| { | ||
| Name: "verify-state", | ||
| Usage: "Recalculate state hash based on snapshot for verification", | ||
| ArgsUsage: "<root>", | ||
| Action: utils.MigrateFlags(verifyState), | ||
| Category: "MISCELLANEOUS COMMANDS", | ||
| Flags: []cli.Flag{ | ||
| utils.DataDirFlag, | ||
| utils.RopstenFlag, | ||
| utils.RinkebyFlag, | ||
| utils.GoerliFlag, | ||
| utils.LegacyTestnetFlag, | ||
| }, | ||
| Description: ` | ||
| geth snapshot verify-state <state-root> | ||
| will traverse the whole accounts and storages set based on the specified | ||
| snapshot and recalculate the root hash of state for verification. | ||
| `, | ||
| }, | ||
| { | ||
| Name: "traverse-state", | ||
| Usage: "Traverse the state with given root hash for verification", | ||
| ArgsUsage: "<root>", | ||
| Action: utils.MigrateFlags(traverseState), | ||
| Category: "MISCELLANEOUS COMMANDS", | ||
| Flags: []cli.Flag{ | ||
| utils.DataDirFlag, | ||
| utils.RopstenFlag, | ||
| utils.RinkebyFlag, | ||
| utils.GoerliFlag, | ||
| utils.LegacyTestnetFlag, | ||
| }, | ||
| Description: ` | ||
| geth snapshot traverse-state <state-root> | ||
| will traverse the whole trie from the given root and will abort if any referenced | ||
| node is missing. This command can be used for trie integrity verification. | ||
| `, | ||
| }, | ||
| { | ||
| Name: "traverse-rawstate", | ||
| Usage: "Traverse the state with given root hash for verification", | ||
| ArgsUsage: "<root>", | ||
| Action: utils.MigrateFlags(traverseRawState), | ||
| Category: "MISCELLANEOUS COMMANDS", | ||
| Flags: []cli.Flag{ | ||
| utils.DataDirFlag, | ||
| utils.RopstenFlag, | ||
| utils.RinkebyFlag, | ||
| utils.GoerliFlag, | ||
| utils.LegacyTestnetFlag, | ||
| }, | ||
| Description: ` | ||
| geth snapshot traverse-rawstate <state-root> | ||
| will traverse the whole trie from the given root and will abort if any referenced | ||
| node/code is missing. This command can be used for trie integrity verification. | ||
| It's basically identical to traverse-state, but the check granularity is smaller. | ||
| `, | ||
| }, | ||
| }, | ||
| } | ||
| ) | ||
|
|
||
| func pruneState(ctx *cli.Context) error { | ||
| stack, _ := makeConfigNode(ctx) | ||
| defer stack.Close() | ||
|
|
||
| chain, chaindb := utils.MakeChain(ctx, stack, true) | ||
| defer chaindb.Close() | ||
|
|
||
| pruner, err := pruner.NewPruner(chaindb, chain.CurrentBlock().Root(), stack.ResolvePath("")) | ||
| if err != nil { | ||
| utils.Fatalf("Failed to open snapshot tree %v", err) | ||
| } | ||
| if ctx.NArg() > 1 { | ||
| utils.Fatalf("too many arguments given") | ||
| } | ||
| var root common.Hash | ||
| if ctx.NArg() == 1 { | ||
| root = common.HexToHash(ctx.Args()[0]) | ||
| } | ||
| err = pruner.Prune(root) | ||
| if err != nil { | ||
| utils.Fatalf("Failed to prune state", "error", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func verifyState(ctx *cli.Context) error { | ||
| stack, _ := makeConfigNode(ctx) | ||
| defer stack.Close() | ||
|
|
||
| chain, chaindb := utils.MakeChain(ctx, stack, true) | ||
| defer chaindb.Close() | ||
|
|
||
| snaptree, err := snapshot.New(chaindb, trie.NewDatabase(chaindb), 256, chain.CurrentBlock().Root(), false, false) | ||
| if err != nil { | ||
| fmt.Println("Failed to open snapshot tree", "error", err) | ||
| return nil | ||
| } | ||
| if ctx.NArg() > 1 { | ||
| utils.Fatalf("too many arguments given") | ||
| } | ||
| var root = chain.CurrentBlock().Root() | ||
| if ctx.NArg() == 1 { | ||
| root = common.HexToHash(ctx.Args()[0]) | ||
| } | ||
| if err := snapshot.VerifyState(snaptree, root); err != nil { | ||
| fmt.Println("Failed to verify state", "error", err) | ||
| } else { | ||
| fmt.Println("Verified the state") | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| var ( | ||
| // emptyRoot is the known root hash of an empty trie. | ||
| emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") | ||
|
|
||
| // emptyCode is the known hash of the empty EVM bytecode. | ||
| emptyCode = crypto.Keccak256(nil) | ||
| ) | ||
|
|
||
| // traverseState is a helper function used for pruning verification. | ||
| // Basically it just iterates the trie, ensure all nodes and assoicated | ||
| // contract codes are present. | ||
| func traverseState(ctx *cli.Context) error { | ||
| glogger := log.NewGlogHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(true))) | ||
| glogger.Verbosity(log.LvlInfo) | ||
| log.Root().SetHandler(glogger) | ||
|
|
||
| stack, _ := makeConfigNode(ctx) | ||
| defer stack.Close() | ||
|
|
||
| _, chaindb := utils.MakeChain(ctx, stack, true) | ||
| defer chaindb.Close() | ||
|
|
||
| if ctx.NArg() > 1 { | ||
| log.Crit("Too many arguments given") | ||
| } | ||
| var root = rawdb.ReadSnapshotRoot(chaindb) | ||
| if ctx.NArg() == 1 { | ||
| root = common.HexToHash(ctx.Args()[0]) | ||
| } | ||
| t, err := trie.NewSecure(root, trie.NewDatabase(chaindb)) | ||
| if err != nil { | ||
| log.Crit("Failed to open trie", "root", root, "error", err) | ||
| } | ||
| var ( | ||
| accounts int | ||
| slots int | ||
| codes int | ||
| lastReport time.Time | ||
| start = time.Now() | ||
| ) | ||
| accIter := trie.NewIterator(t.NodeIterator(nil)) | ||
| for accIter.Next() { | ||
| accounts += 1 | ||
| var acc struct { | ||
| Nonce uint64 | ||
| Balance *big.Int | ||
| Root common.Hash | ||
| CodeHash []byte | ||
| } | ||
| if err := rlp.DecodeBytes(accIter.Value, &acc); err != nil { | ||
| log.Crit("Invalid account encountered during traversal", "error", err) | ||
| } | ||
| if acc.Root != emptyRoot { | ||
| storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(chaindb)) | ||
| if err != nil { | ||
| log.Crit("Failed to open storage trie", "root", acc.Root, "error", err) | ||
| } | ||
| storageIter := trie.NewIterator(storageTrie.NodeIterator(nil)) | ||
| for storageIter.Next() { | ||
| slots += 1 | ||
| } | ||
| if storageIter.Err != nil { | ||
| log.Crit("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Err) | ||
| } | ||
| } | ||
| if !bytes.Equal(acc.CodeHash, emptyCode) { | ||
| code := rawdb.ReadCode(chaindb, common.BytesToHash(acc.CodeHash)) | ||
| if len(code) == 0 { | ||
| log.Crit("Code is missing", "hash", common.BytesToHash(acc.CodeHash)) | ||
| } | ||
| codes += 1 | ||
| } | ||
| if time.Since(lastReport) > time.Second*8 { | ||
| log.Info("Traversing state", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) | ||
| lastReport = time.Now() | ||
| } | ||
| } | ||
| if accIter.Err != nil { | ||
| log.Crit("Failed to traverse state trie", "root", root, "error", accIter.Err) | ||
| } | ||
| log.Info("State is complete", "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) | ||
| return nil | ||
| } | ||
|
|
||
| // traverseRawState is a helper function used for pruning verification. | ||
| // Basically it just iterates the trie, ensure all nodes and assoicated | ||
| // contract codes are present. It's basically identical to traverseState | ||
| // but it will check each trie node. | ||
| func traverseRawState(ctx *cli.Context) error { | ||
| glogger := log.NewGlogHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(true))) | ||
| glogger.Verbosity(log.LvlInfo) | ||
| log.Root().SetHandler(glogger) | ||
|
|
||
| stack, _ := makeConfigNode(ctx) | ||
| defer stack.Close() | ||
|
|
||
| _, chaindb := utils.MakeChain(ctx, stack, true) | ||
| defer chaindb.Close() | ||
|
|
||
| if ctx.NArg() > 1 { | ||
| log.Crit("Too many arguments given") | ||
| } | ||
| var root = rawdb.ReadSnapshotRoot(chaindb) | ||
| if ctx.NArg() == 1 { | ||
| root = common.HexToHash(ctx.Args()[0]) | ||
| } | ||
| t, err := trie.NewSecure(root, trie.NewDatabase(chaindb)) | ||
| if err != nil { | ||
| log.Crit("Failed to open trie", "root", root, "error", err) | ||
| } | ||
| log.Info("Opened the state trie", "root", root) | ||
| var ( | ||
| nodes int | ||
| accounts int | ||
| slots int | ||
| codes int | ||
| lastReport time.Time | ||
| start = time.Now() | ||
| ) | ||
| accIter := t.NodeIterator(nil) | ||
| for accIter.Next(true) { | ||
| nodes += 1 | ||
| node := accIter.Hash() | ||
|
|
||
| if node != (common.Hash{}) { | ||
| // Check the present for non-empty hash node(embeded node doesn't | ||
| // have their own hash). | ||
| blob := rawdb.ReadTrieNode(chaindb, node) | ||
| if len(blob) == 0 { | ||
| log.Crit("Missing trie node(account)", "hash", node) | ||
| } | ||
| } | ||
| // If it's a leaf node, yes we are touching an account, | ||
| // dig into the storage trie further. | ||
| if accIter.Leaf() { | ||
| accounts += 1 | ||
| var acc struct { | ||
| Nonce uint64 | ||
| Balance *big.Int | ||
| Root common.Hash | ||
| CodeHash []byte | ||
| } | ||
| if err := rlp.DecodeBytes(accIter.LeafBlob(), &acc); err != nil { | ||
| log.Crit("Invalid account encountered during traversal", "error", err) | ||
| } | ||
| if acc.Root != emptyRoot { | ||
| storageTrie, err := trie.NewSecure(acc.Root, trie.NewDatabase(chaindb)) | ||
| if err != nil { | ||
| log.Crit("Failed to open storage trie", "root", acc.Root, "error", err) | ||
| } | ||
| storageIter := storageTrie.NodeIterator(nil) | ||
| for storageIter.Next(true) { | ||
| nodes += 1 | ||
| node := storageIter.Hash() | ||
|
|
||
| // Check the present for non-empty hash node(embeded node doesn't | ||
| // have their own hash). | ||
| if node != (common.Hash{}) { | ||
| blob := rawdb.ReadTrieNode(chaindb, node) | ||
| if len(blob) == 0 { | ||
| log.Crit("Missing trie node(storage)", "hash", node) | ||
| } | ||
| } | ||
| // Bump the counter if it's leaf node. | ||
| if storageIter.Leaf() { | ||
| slots += 1 | ||
| } | ||
| } | ||
| if storageIter.Error() != nil { | ||
| log.Crit("Failed to traverse storage trie", "root", acc.Root, "error", storageIter.Error()) | ||
| } | ||
| } | ||
| if !bytes.Equal(acc.CodeHash, emptyCode) { | ||
| code := rawdb.ReadCode(chaindb, common.BytesToHash(acc.CodeHash)) | ||
| if len(code) == 0 { | ||
| log.Crit("Code is missing", "account", common.BytesToHash(accIter.LeafKey())) | ||
| } | ||
| codes += 1 | ||
| } | ||
| if time.Since(lastReport) > time.Second*8 { | ||
| log.Info("Traversing state", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) | ||
| lastReport = time.Now() | ||
| } | ||
| } | ||
| } | ||
| if accIter.Error() != nil { | ||
| log.Crit("Failed to traverse state trie", "root", root, "error", accIter.Error()) | ||
| } | ||
| log.Info("State is complete", "nodes", nodes, "accounts", accounts, "slots", slots, "codes", codes, "elapsed", common.PrettyDuration(time.Since(start))) | ||
| return nil | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't it be better to return an error instead of just exiting on errors? (re this location and all others)