Skip to content

Commit 490a664

Browse files
committed
eth, trie: improve code comment
1 parent ebd5fc7 commit 490a664

File tree

2 files changed

+122
-130
lines changed

2 files changed

+122
-130
lines changed

eth/protocols/snap/gentrie.go

Lines changed: 114 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,14 @@ import (
2525
"github.com/ethereum/go-ethereum/trie"
2626
)
2727

28-
// genTrie interface is used by the trie to generate merkle tree nodes based
29-
// on a received batch of states.
28+
// genTrie interface is used by the snap syncer to generate merkle tree nodes
29+
// based on a received batch of states.
3030
type genTrie interface {
3131
// update inserts the state into generator trie.
3232
update(key, value []byte) error
3333

34-
// commit flushes the leftover nodes produced in the trie into database.
35-
// The nodes on right boundary won't be committed unless this function
36-
// is called. The flag complete should be set to true if there are more
37-
// items on the right side.
38-
//
39-
// This function must be called before flushing database batch.
34+
// commit flushes the right boundary nodes if complete flag is true. This
35+
// function must be called before flushing the associated database batch.
4036
commit(complete bool) common.Hash
4137
}
4238

@@ -47,11 +43,11 @@ type genTrie interface {
4743
type pathTrie struct {
4844
owner common.Hash // identifier of trie owner, empty for account trie
4945
tr *trie.StackTrie // underlying raw stack trie
50-
first []byte // the path of first written node
51-
last []byte // the path of last written node
46+
first []byte // the path of first committed node by stackTrie
47+
last []byte // the path of last committed node by stackTrie
5248

53-
// Flag whether the nodes on the left boundary are skipped for committing.
54-
// If it's set, then nodes on the left boundary are regarded as incomplete
49+
// This flag indicates whether nodes on the left boundary are skipped for
50+
// committing. If set, the left boundary nodes are considered incomplete
5551
// due to potentially missing left children.
5652
skipLeftBoundary bool
5753
db ethdb.KeyValueReader
@@ -70,58 +66,52 @@ func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueRead
7066
return tr
7167
}
7268

73-
// onTrieNode is invoked whenever a new node is produced by the stackTrie.
69+
// onTrieNode is invoked whenever a new node is committed by the stackTrie.
7470
//
75-
// As the produced nodes might be incomplete if they are on the boundaries
71+
// As the committed nodes might be incomplete if they are on the boundaries
7672
// (left or right), this function has the ability to detect the incomplete
77-
// ones and filter them out for committing. Namely, only the nodes belonging
78-
// to completed subtries will be committed.
73+
// ones and filter them out for committing.
7974
//
8075
// Additionally, the assumption is made that there may exist leftover dangling
81-
// nodes in the database. This function has the ability to detect all the
82-
// dangling nodes that fall within the committed subtries (on the path covered
83-
// by internal extension nodes) and remove them from the database. This property
84-
// ensures that the entire path space is uniquely occupied by committed subtries.
76+
// nodes in the database. This function has the ability to detect the dangling
77+
// nodes that fall within the path space of committed nodes (specifically on
78+
// the path covered by internal extension nodes) and remove them from the
79+
// database. This property ensures that the entire path space is uniquely
80+
// occupied by committed nodes.
8581
//
86-
// Furthermore, all leftover dangling nodes along the path from committed tries
87-
// to the root node should be removed as well; otherwise, they might potentially
88-
// disrupt the state healing process, leaving behind an inconsistent state.
82+
// Furthermore, all leftover dangling nodes along the path from committed nodes
83+
// to the trie root (left and right boundaries) should be removed as well;
84+
// otherwise, they might potentially disrupt the state healing process.
8985
func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
90-
// Filter out the nodes on the left boundary if skipLeftBoundary is configured.
91-
// Nodes are considered to be on the left boundary if it's the first one
92-
// produced, or on the path of the first produced one.
86+
// Filter out the nodes on the left boundary if skipLeftBoundary is
87+
// configured. Nodes are considered to be on the left boundary if
88+
// it's the first one to be committed, or the parent/ancestor of the
89+
// first committed node.
9390
if t.skipLeftBoundary && (t.first == nil || bytes.HasPrefix(t.first, path)) {
9491
if t.first == nil {
95-
// Memorize the path of first produced node, which is regarded
92+
// Memorize the path of first committed node, which is regarded
9693
// as left boundary. Deep-copy is necessary as the path given
9794
// is volatile.
9895
t.first = append([]byte{}, path...)
9996

100-
// The position of first complete sub trie (e.g. N_4) can be determined
101-
// by the first produced node(e.g. N_1) correctly, with a branch node
102-
// (e.g. N_5) as the common parent for shared path prefix. Therefore,
103-
// the nodes along the path from root to N_1 can be regarded as left
104-
// boundary and the parent of the first complete sub trie. The leftover
105-
// dangling nodes on left boundary should be cleaned out first before
106-
// committing any node.
107-
//
108-
// +-------------+
109-
// | N_5 | parent for shared path prefix
110-
// +-------------+
111-
// /- | -\
112-
// / | [ others ]
113-
// +-----+ +-----+
114-
// First produced one | N_1 | | N_4 | First completed sub trie
115-
// +-----+ +-----+
116-
// /- -\
117-
// N-2 ... N-3
97+
// The left boundary can be uniquely determined by the first committed node
98+
// from stackTrie (e.g., N_1), as the shared path prefix between the first
99+
// two inserted state items is deterministic (the path of N_3). The path
100+
// from trie root towards the first committed node is considered the left
101+
// boundary. The potential leftover dangling nodes on left boundary should
102+
// be cleaned out.
118103
//
119-
// Nodes must be cleaned from top to bottom as it's possible the procedure
120-
// is interrupted in the middle.
104+
// +-----+
105+
// | N_3 | shared path prefix of state_1 and state_2
106+
// +-----+
107+
// /- -\
108+
// +-----+ +-----+
109+
// First committed node | N_1 | | N_2 | latest inserted node (contain state_2)
110+
// +-----+ +-----+
121111
//
122-
// The node with the path of the first produced node is not removed, as
123-
// it's a sibling of the first complete sub-trie, not the parent. There
124-
// is no reason to remove it.
112+
// The node with the path of the first committed one (e.g, N_1) is not
113+
// removed because it's a sibling of the complete nodes, not the parent
114+
// or ancestor.
125115
for i := 0; i < len(path); i++ {
126116
t.delete(path[:i], false)
127117
}
@@ -131,21 +121,21 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
131121
// If boundary filtering is not configured, or the node is not on the left
132122
// boundary, commit it to database.
133123
//
134-
// Note, the nodes fall within the path between extension node and its
135-
// **in disk** child must be cleaned out before committing the extension
136-
// node. This is essential in snap sync to avoid leaving dangling nodes
137-
// within this range covered by extension node which could potentially
138-
// break the state healing.
124+
// Note: If the current committed node is an extension node, then the nodes
125+
// falling within the path between itself and its standalone (not embedded
126+
// in parent) child should be cleaned out for exclusively occupy the inner
127+
// path.
139128
//
140-
// The target node is detected if its path is the prefix of last written
141-
// one and path gap is larger than one. The current node could be a full
142-
// node, or a shortNode with single byte key if the path gap is one,
143-
// unnecessary to sweep the internal path in this case.
129+
// This is essential in snap sync to avoid leaving dangling nodes within
130+
// this range covered by extension node which could potentially break the
131+
// state healing.
144132
//
145-
// Nodes must be cleaned from top to bottom, including the node with the
146-
// path of the committed extension node itself.
133+
// The extension node is detected if its path is the prefix of last written
134+
// one and path gap is larger than one. If the path gap is only one byte,
135+
// the current node could either be a full node, or a extension with single
136+
// byte key. In either case, no gaps will be left in the path.
147137
if t.last != nil && bytes.HasPrefix(t.last, path) && len(t.last)-len(path) > 1 {
148-
for i := len(path); i < len(t.last); i++ {
138+
for i := len(path) + 1; i < len(t.last); i++ {
149139
t.delete(t.last[:i], true)
150140
}
151141
}
@@ -168,37 +158,47 @@ func (t *pathTrie) write(path []byte, blob []byte) {
168158
}
169159
}
170160

171-
// delete commits the node deletion to provided database batch in path mode.
172-
func (t *pathTrie) delete(path []byte, inner bool) {
173-
if t.owner == (common.Hash{}) {
174-
if rawdb.ExistsAccountTrieNode(t.db, path) {
175-
rawdb.DeleteAccountTrieNode(t.batch, path)
176-
if inner {
177-
accountInnerDeleteGauge.Inc(1)
178-
} else {
179-
accountOuterDeleteGauge.Inc(1)
180-
}
181-
}
182-
if inner {
183-
accountInnerLookupGauge.Inc(1)
184-
} else {
185-
accountOuterLookupGauge.Inc(1)
186-
}
161+
func (t *pathTrie) deleteAccountNode(path []byte, inner bool) {
162+
if inner {
163+
accountInnerLookupGauge.Inc(1)
164+
} else {
165+
accountOuterLookupGauge.Inc(1)
166+
}
167+
if !rawdb.ExistsAccountTrieNode(t.db, path) {
187168
return
188169
}
189-
if rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
190-
rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
191-
if inner {
192-
storageInnerDeleteGauge.Inc(1)
193-
} else {
194-
storageOuterDeleteGauge.Inc(1)
195-
}
170+
if inner {
171+
accountInnerDeleteGauge.Inc(1)
172+
} else {
173+
accountOuterDeleteGauge.Inc(1)
196174
}
175+
rawdb.DeleteAccountTrieNode(t.batch, path)
176+
}
177+
178+
func (t *pathTrie) deleteStorageNode(path []byte, inner bool) {
197179
if inner {
198180
storageInnerLookupGauge.Inc(1)
199181
} else {
200182
storageOuterLookupGauge.Inc(1)
201183
}
184+
if !rawdb.ExistsStorageTrieNode(t.db, t.owner, path) {
185+
return
186+
}
187+
if inner {
188+
storageInnerDeleteGauge.Inc(1)
189+
} else {
190+
storageOuterDeleteGauge.Inc(1)
191+
}
192+
rawdb.DeleteStorageTrieNode(t.batch, t.owner, path)
193+
}
194+
195+
// delete commits the node deletion to provided database batch in path mode.
196+
func (t *pathTrie) delete(path []byte, inner bool) {
197+
if t.owner == (common.Hash{}) {
198+
t.deleteAccountNode(path, inner)
199+
} else {
200+
t.deleteStorageNode(path, inner)
201+
}
202202
}
203203

204204
// update implements genTrie interface, inserting a (key, value) pair into the
@@ -208,8 +208,8 @@ func (t *pathTrie) update(key, value []byte) error {
208208
}
209209

210210
// commit implements genTrie interface, flushing the right boundary if it's
211-
// regarded as complete. Otherwise, the nodes on the right boundary are discarded
212-
// and cleaned up.
211+
// considered as complete. Otherwise, the nodes on the right boundary are
212+
// discarded and cleaned up.
213213
//
214214
// Note, this function must be called before flushing database batch, otherwise,
215215
// dangling nodes might be left in database.
@@ -218,51 +218,43 @@ func (t *pathTrie) commit(complete bool) common.Hash {
218218
// The nodes on both left and right boundary will still be filtered
219219
// out if left boundary filtering is configured.
220220
if complete {
221-
// The produced hash is meaningless if left side is incomplete
221+
// Commit all inserted but not yet committed nodes(on the right
222+
// boundary) in the stackTrie.
223+
hash := t.tr.Hash()
222224
if t.skipLeftBoundary {
223-
return common.Hash{}
225+
return common.Hash{} // hash is meaningless if left side is incomplete
224226
}
225-
return t.tr.Hash()
227+
return hash
226228
}
227-
// If the right boundary is claimed as incomplete, the uncommitted
228-
// nodes should be discarded, as they might be incomplete due to
229-
// missing children on the right side. Furthermore, previously committed
230-
// nodes can be the children of the right boundary nodes; therefore,
231-
// the nodes of the right boundary must be cleaned out!
229+
// Discard nodes on the right boundary as it's claimed as incomplete. These
230+
// nodes might be incomplete due to missing children on the right side.
231+
// Furthermore, the potential leftover nodes on right boundary should also
232+
// be cleaned out.
232233
//
233-
// The position of the last complete sub-trie (e.g., N_1) can be correctly
234-
// determined by the last produced node (e.g., N_4), with a branch node
235-
// (e.g., N_5) as the common parent for the shared path prefix. Therefore,
236-
// the nodes along the path from the root to N_4 can be regarded as the
237-
// right boundary and the parent of the last complete subtrie.
234+
// The right boundary can be uniquely determined by the last committed node
235+
// from stackTrie (e.g., N_1), as the shared path prefix between the last
236+
// two inserted state items is deterministic (the path of N_3). The path
237+
// from trie root towards the last committed node is considered the right
238+
// boundary (root to N_3).
238239
//
239-
// +-----+
240-
// | N_5 | parent for shared path prefix
241-
// +-----+
242-
// /- -\
243-
// +-----+ +-----+
244-
// Last complete subtrie | N_1 | | N_4 | Last produced node
245-
// +-----+ +-----+
246-
// /- -\
247-
// N-2 .. N-3
240+
// +-----+
241+
// | N_3 | shared path prefix of last two states
242+
// +-----+
243+
// /- -\
244+
// +-----+ +-----+
245+
// Last committed node | N_1 | | N_2 | latest inserted node (contain last state)
246+
// +-----+ +-----+
248247
//
249248
// Another interesting scenario occurs when the trie is committed due to
250249
// too many items being accumulated in the batch. To flush them out to
251-
// the database, the path of the last inserted item is temporarily treated
252-
// as an incomplete right boundary, and nodes on this path are removed.
253-
//
250+
// the database, the path of the last inserted node (N_2) is temporarily
251+
// treated as an incomplete right boundary, and nodes on this path are
252+
// removed (e.g. from root to N_3).
254253
// However, this path will be reclaimed as an internal path by inserting
255-
// more items after the batch flush. Newly produced nodes on this path
256-
// can be committed with no issues as they are actually complete (also
257-
// from a database perspective, first deleting and then rewriting is
258-
// still a valid data update).
259-
//
260-
// Nodes must be cleaned from top to bottom as it's possible the procedure
261-
// is interrupted in the middle.
254+
// more items after the batch flush. New nodes on this path can be committed
255+
// with no issues as they are actually complete. Also, from a database
256+
// perspective, first deleting and then rewriting is a valid data update.
262257
for i := 0; i < len(t.last); i++ {
263-
// The node with the path of the last produced node is not removed, as
264-
// it's a sibling of the last complete sub-trie, not the parent. There
265-
// is no reason to remove it.
266258
t.delete(t.last[:i], false)
267259
}
268260
return common.Hash{} // the hash is meaningless for incomplete commit

trie/stacktrie.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ var (
3030
_ = types.TrieHasher((*StackTrie)(nil))
3131
)
3232

33-
// OnTrieNode is a callback method to invoke when a trie node is produced
34-
// by the stack trie.
33+
// OnTrieNode is a callback method invoked when a trie node is committed
34+
// by the stack trie. The node is only committed if it's considered complete.
3535
//
3636
// The caller should not modify the contents of the returned path and blob
37-
// slice, and their contents may change after the call. It is up to the
38-
// `onTrieNode` receiver function to deep-copy the data if it wants to
39-
// retain it after the call ends.
37+
// slice, and their contents may be changed after the call. It is up to the
38+
// `onTrieNode` receiver function to deep-copy the data if it wants to retain
39+
// it after the call ends.
4040
type OnTrieNode func(path []byte, hash common.Hash, blob []byte)
4141

4242
// StackTrie is a trie implementation that expects keys to be inserted
@@ -49,8 +49,8 @@ type StackTrie struct {
4949
onTrieNode OnTrieNode
5050
}
5151

52-
// NewStackTrie allocates and initializes an empty trie. The produced nodes will
53-
// be discarded immediately if no callback is provided.
52+
// NewStackTrie allocates and initializes an empty trie. The committed nodes
53+
// will be discarded immediately if no callback is configured.
5454
func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
5555
return &StackTrie{
5656
root: stPool.Get().(*stNode),
@@ -374,7 +374,7 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
374374

375375
// Invoke the callback it's provided. Notably, the path and blob slices are
376376
// volatile, please deep-copy the slices in callback if the contents need
377-
// to be saved.
377+
// to be retained.
378378
if t.onTrieNode != nil {
379379
t.onTrieNode(path, common.BytesToHash(st.val), blob)
380380
}

0 commit comments

Comments
 (0)