Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 56 additions & 62 deletions trie/committer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/rlp"
"golang.org/x/crypto/sha3"
)

Expand All @@ -33,10 +32,9 @@ const leafChanSize = 200

// leaf represents a trie leaf value
type leaf struct {
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
vnodes bool // set to true if the node (possibly) contains a valueNode
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
}

// committer is a type used for the trie Commit operation. A committer has some
Expand Down Expand Up @@ -74,26 +72,20 @@ func returnCommitterToPool(h *committer) {
committerPool.Put(h)
}

// commitNeeded returns 'false' if the given node is already in sync with db
func (c *committer) commitNeeded(n node) bool {
hash, dirty := n.cache()
return hash == nil || dirty
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) Commit(n node, db *Database) (hashNode, error) {
if db == nil {
return nil, errors.New("no db provided")
}
h, err := c.commit(n, db, true)
h, err := c.commit(n, db)
if err != nil {
return nil, err
}
return h.(hashNode), nil
}

// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) commit(n node, db *Database, force bool) (node, error) {
func (c *committer) commit(n node, db *Database) (node, error) {
// if this path is clean, use available cached data
hash, dirty := n.cache()
if hash != nil && !dirty {
Expand All @@ -104,87 +96,90 @@ func (c *committer) commit(n node, db *Database, force bool) (node, error) {
case *shortNode:
// Commit child
collapsed := cn.copy()
if _, ok := cn.Val.(valueNode); !ok {
childV, err := c.commit(cn.Val, db, false)

// If the child is fullnode, recursively commit.
// Otherwise it can only be hashNode or valueNode.
if _, ok := cn.Val.(*fullNode); ok {
Comment on lines +101 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't the child be another shortnode?
Let's say you have a small trie. The root will be a shortnode, when you start inserting, you will have only shortnodes until they start getting crowded, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The child of a shortnode can't be the shortnode. If so, they can be compressed into a single one. Look at here https://github.com/ethereum/go-ethereum/blob/master/trie/trie.go#L299

The root will be a shortnode, when you start inserting, you will have only shortnodes until they start getting crowded

When you insert some entries, the shortnode will be converted into a fullnode, the inserted entry will be created as the shortnode.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi,
Somehow I ended up on this git email distribution list. Can somebody please remove me?? I get emails three times a day with this subject:

[ethereum/go-ethereum] trie: polish committer (#21351)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, removed.

childV, err := c.commit(cn.Val, db)
if err != nil {
return nil, err
}
collapsed.Val = childV
}
// The key needs to be copied, since we're delivering it to database
collapsed.Key = hexToCompact(cn.Key)
hashedNode := c.store(collapsed, db, force, true)
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case *fullNode:
hashedKids, hasVnodes, err := c.commitChildren(cn, db, force)
hashedKids, err := c.commitChildren(cn, db)
if err != nil {
return nil, err
}
collapsed := cn.copy()
collapsed.Children = hashedKids

hashedNode := c.store(collapsed, db, force, hasVnodes)
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case valueNode:
return c.store(cn, db, force, false), nil
// hashnodes aren't stored
case hashNode:
return cn, nil
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
return hash, nil
}

// commitChildren commits the children of the given fullnode
func (c *committer) commitChildren(n *fullNode, db *Database, force bool) ([17]node, bool, error) {
func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, error) {
var children [17]node
var hasValueNodeChildren = false
for i, child := range n.Children {
for i := 0; i < 16; i++ {
child := n.Children[i]
if child == nil {
continue
}
hnode, err := c.commit(child, db, false)
if err != nil {
return children, false, err
// If it's the hashed child, save the hash value directly.
// Note: it's impossible that the child in range [0, 15]
// is a valuenode.
if hn, ok := child.(hashNode); ok {
children[i] = hn
continue
}
children[i] = hnode
if _, ok := hnode.(valueNode); ok {
hasValueNodeChildren = true
// Commit the child recursively and store the "hashed" value.
// Note the returned node can be some embedded nodes, so it's
// possible the type is not hashnode.
hashed, err := c.commit(child, db)
if err != nil {
return children, err
}
children[i] = hashed
}
return children, hasValueNodeChildren, nil
// For the 17th child, it's possible the type is valuenode.
if n.Children[16] != nil {
children[16] = n.Children[16]
}
return children, nil
}

// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren bool) node {
func (c *committer) store(n node, db *Database) node {
// Larger nodes are replaced by their hash and stored in the database.
var (
hash, _ = n.cache()
size int
)
if hash == nil {
if vn, ok := n.(valueNode); ok {
c.tmp.Reset()
if err := rlp.Encode(&c.tmp, vn); err != nil {
panic("encode error: " + err.Error())
}
size = len(c.tmp)
if size < 32 && !force {
return n // Nodes smaller than 32 bytes are stored inside their parent
}
hash = c.makeHashNode(c.tmp)
} else {
// This was not generated - must be a small node stored in the parent
// No need to do anything here
return n
}
// This was not generated - must be a small node stored in the parent.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a substantial change. I don't understand why this is correct. Can you elaborate?

Copy link
Member Author

@rjl493456442 rjl493456442 Jul 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we try to commit the trie, we will first hash the trie. This assumption can always be held.
So if the cached hash is missing, there is only one possibility: the node is too small to have the hash.

The code path I delete should never be triggered. We never calculate the hash for value directly.
We only calculate the hash for "shortNode" + "fullNode". The value is always embedded in the shortnode or the 17th child of fullnode.

// In theory we should apply the leafCall here if it's not nil(embedded
// node usually contains value). But small value(less than 32bytes) is
// not our target.
return n
} else {
// We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact
Expand All @@ -194,10 +189,9 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
// The leaf channel will be active only when there an active leaf-callback
if c.leafCh != nil {
c.leafCh <- &leaf{
size: size,
hash: common.BytesToHash(hash),
node: n,
vnodes: hasVnodeChildren,
size: size,
hash: common.BytesToHash(hash),
node: n,
}
} else if db != nil {
// No leaf-callback used, but there's still a database. Do serial
Expand All @@ -209,30 +203,30 @@ func (c *committer) store(n node, db *Database, force bool, hasVnodeChildren boo
return hash
}

// commitLoop does the actual insert + leaf callback for nodes
// commitLoop does the actual insert + leaf callback for nodes.
func (c *committer) commitLoop(db *Database) {
for item := range c.leafCh {
var (
hash = item.hash
size = item.size
n = item.node
hasVnodes = item.vnodes
hash = item.hash
size = item.size
n = item.node
)
// We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock()
db.insert(hash, size, n)
db.lock.Unlock()
if c.onleaf != nil && hasVnodes {

if c.onleaf != nil {
switch n := n.(type) {
case *shortNode:
if child, ok := n.Val.(valueNode); ok {
c.onleaf(nil, child, hash)
}
case *fullNode:
for i := 0; i < 16; i++ {
if child, ok := n.Children[i].(valueNode); ok {
c.onleaf(nil, child, hash)
}
// For children in range [0, 15], it's impossible
// to contain valuenode. Only check the 17th child.
if n.Children[16] != nil {
c.onleaf(nil, n.Children[16].(valueNode), hash)
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions trie/hasher.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ func returnHasherToPool(h *hasher) {
// hash collapses a node down into a hash node, also returning a copy of the
// original node initialized with the computed hash to replace the original one.
func (h *hasher) hash(n node, force bool) (hashed node, cached node) {
// We're not storing the node, just hashing, use available cached data
// Return the cached hash if it's available
if hash, _ := n.cache(); hash != nil {
return hash, n
}
// Trie not processed yet or needs storage, walk the children
// Trie not processed yet, walk the children
switch n := n.(type) {
case *shortNode:
collapsed, cached := h.hashShortNodeChildren(n)
Expand Down
5 changes: 4 additions & 1 deletion trie/trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,13 +505,16 @@ func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
if t.root == nil {
return emptyRoot, nil
}
// Derive the hash for all dirty nodes first. We hold the assumption
// in the following procedure that all nodes are hashed.
rootHash := t.Hash()
h := newCommitter()
defer returnCommitterToPool(h)

// Do a quick check if we really need to commit, before we spin
// up goroutines. This can happen e.g. if we load a trie for reading storage
// values, but don't write to it.
if !h.commitNeeded(t.root) {
if _, dirty := t.root.cache(); !dirty {
return rootHash, nil
}
var wg sync.WaitGroup
Expand Down
Loading