@@ -25,18 +25,14 @@ import (
2525 "github.com/ethereum/go-ethereum/trie"
2626)
2727
28- // genTrie interface is used by the trie to generate merkle tree nodes based
29- // on a received batch of states.
28+ // genTrie interface is used by the snap syncer to generate merkle tree nodes
29+ // based on a received batch of states.
3030type genTrie interface {
3131 // update inserts the state into generator trie.
3232 update (key , value []byte ) error
3333
34- // commit flushes the leftover nodes produced in the trie into database.
35- // The nodes on right boundary won't be committed unless this function
36- // is called. The flag complete should be set to true if there are more
37- // items on the right side.
38- //
39- // This function must be called before flushing database batch.
34+ // commit flushes the right boundary nodes if complete flag is true. This
35+ // function must be called before flushing the associated database batch.
4036 commit (complete bool ) common.Hash
4137}
4238
@@ -47,11 +43,11 @@ type genTrie interface {
4743type pathTrie struct {
4844 owner common.Hash // identifier of trie owner, empty for account trie
4945 tr * trie.StackTrie // underlying raw stack trie
50- first []byte // the path of first written node
51- last []byte // the path of last written node
46+ first []byte // the path of first committed node by stackTrie
47+ last []byte // the path of last committed node by stackTrie
5248
53- // Flag whether the nodes on the left boundary are skipped for committing.
54- // If it's set, then nodes on the left boundary are regarded as incomplete
49+ // This flag indicates whether nodes on the left boundary are skipped for
50+ // committing. If set, the left boundary nodes are considered incomplete
5551 // due to potentially missing left children.
5652 skipLeftBoundary bool
5753 db ethdb.KeyValueReader
@@ -70,58 +66,52 @@ func newPathTrie(owner common.Hash, skipLeftBoundary bool, db ethdb.KeyValueRead
7066 return tr
7167}
7268
73- // onTrieNode is invoked whenever a new node is produced by the stackTrie.
69+ // onTrieNode is invoked whenever a new node is committed by the stackTrie.
7470//
75- // As the produced nodes might be incomplete if they are on the boundaries
71+ // As the committed nodes might be incomplete if they are on the boundaries
7672// (left or right), this function has the ability to detect the incomplete
77- // ones and filter them out for committing. Namely, only the nodes belonging
78- // to completed subtries will be committed.
73+ // ones and filter them out for committing.
7974//
8075// Additionally, the assumption is made that there may exist leftover dangling
81- // nodes in the database. This function has the ability to detect all the
82- // dangling nodes that fall within the committed subtries (on the path covered
83- // by internal extension nodes) and remove them from the database. This property
84- // ensures that the entire path space is uniquely occupied by committed subtries.
76+ // nodes in the database. This function has the ability to detect the dangling
77+ // nodes that fall within the path space of committed nodes (specifically on
78+ // the path covered by internal extension nodes) and remove them from the
79+ // database. This property ensures that the entire path space is uniquely
80+ // occupied by committed nodes.
8581//
86- // Furthermore, all leftover dangling nodes along the path from committed tries
87- // to the root node should be removed as well; otherwise, they might potentially
88- // disrupt the state healing process, leaving behind an inconsistent state .
82+ // Furthermore, all leftover dangling nodes along the path from committed nodes
83+ // to the trie root (left and right boundaries) should be removed as well;
84+ // otherwise, they might potentially disrupt the state healing process.
8985func (t * pathTrie ) onTrieNode (path []byte , hash common.Hash , blob []byte ) {
90- // Filter out the nodes on the left boundary if skipLeftBoundary is configured.
91- // Nodes are considered to be on the left boundary if it's the first one
92- // produced, or on the path of the first produced one.
86+ // Filter out the nodes on the left boundary if skipLeftBoundary is
87+ // configured. Nodes are considered to be on the left boundary if
88+ // it's the first one to be committed, or the parent/ancestor of the
89+ // first committed node.
9390 if t .skipLeftBoundary && (t .first == nil || bytes .HasPrefix (t .first , path )) {
9491 if t .first == nil {
95- // Memorize the path of first produced node, which is regarded
92+ // Memorize the path of first committed node, which is regarded
9693 // as left boundary. Deep-copy is necessary as the path given
9794 // is volatile.
9895 t .first = append ([]byte {}, path ... )
9996
100- // The position of first complete sub trie (e.g. N_4) can be determined
101- // by the first produced node(e.g. N_1) correctly, with a branch node
102- // (e.g. N_5) as the common parent for shared path prefix. Therefore,
103- // the nodes along the path from root to N_1 can be regarded as left
104- // boundary and the parent of the first complete sub trie. The leftover
105- // dangling nodes on left boundary should be cleaned out first before
106- // committing any node.
107- //
108- // +-------------+
109- // | N_5 | parent for shared path prefix
110- // +-------------+
111- // /- | -\
112- // / | [ others ]
113- // +-----+ +-----+
114- // First produced one | N_1 | | N_4 | First completed sub trie
115- // +-----+ +-----+
116- // /- -\
117- // N-2 ... N-3
97+ // The left boundary can be uniquely determined by the first committed node
98+ // from stackTrie (e.g., N_1), as the shared path prefix between the first
99+ // two inserted state items is deterministic (the path of N_3). The path
100+ // from trie root towards the first committed node is considered the left
101+ // boundary. The potential leftover dangling nodes on left boundary should
102+ // be cleaned out.
118103 //
119- // Nodes must be cleaned from top to bottom as it's possible the procedure
120- // is interrupted in the middle.
104+ // +-----+
105+ // | N_3 | shared path prefix of state_1 and state_2
106+ // +-----+
107+ // /- -\
108+ // +-----+ +-----+
109+ // First committed node | N_1 | | N_2 | latest inserted node (contain state_2)
110+ // +-----+ +-----+
121111 //
122- // The node with the path of the first produced node is not removed, as
123- // it's a sibling of the first complete sub-trie , not the parent. There
124- // is no reason to remove it .
112+ // The node with the path of the first committed one (e.g, N_1) is not
113+ // removed because it's a sibling of the complete nodes , not the parent
114+ // or ancestor .
125115 for i := 0 ; i < len (path ); i ++ {
126116 t .delete (path [:i ], false )
127117 }
@@ -131,21 +121,21 @@ func (t *pathTrie) onTrieNode(path []byte, hash common.Hash, blob []byte) {
131121 // If boundary filtering is not configured, or the node is not on the left
132122 // boundary, commit it to database.
133123 //
134- // Note, the nodes fall within the path between extension node and its
135- // **in disk** child must be cleaned out before committing the extension
136- // node. This is essential in snap sync to avoid leaving dangling nodes
137- // within this range covered by extension node which could potentially
138- // break the state healing.
124+ // Note: If the current committed node is an extension node, then the nodes
125+ // falling within the path between itself and its standalone (not embedded
126+ // in parent) child should be cleaned out for exclusively occupy the inner
127+ // path.
139128 //
140- // The target node is detected if its path is the prefix of last written
141- // one and path gap is larger than one. The current node could be a full
142- // node, or a shortNode with single byte key if the path gap is one,
143- // unnecessary to sweep the internal path in this case.
129+ // This is essential in snap sync to avoid leaving dangling nodes within
130+ // this range covered by extension node which could potentially break the
131+ // state healing.
144132 //
145- // Nodes must be cleaned from top to bottom, including the node with the
146- // path of the committed extension node itself.
133+ // The extension node is detected if its path is the prefix of last written
134+ // one and path gap is larger than one. If the path gap is only one byte,
135+ // the current node could either be a full node, or a extension with single
136+ // byte key. In either case, no gaps will be left in the path.
147137 if t .last != nil && bytes .HasPrefix (t .last , path ) && len (t .last )- len (path ) > 1 {
148- for i := len (path ); i < len (t .last ); i ++ {
138+ for i := len (path ) + 1 ; i < len (t .last ); i ++ {
149139 t .delete (t .last [:i ], true )
150140 }
151141 }
@@ -168,37 +158,47 @@ func (t *pathTrie) write(path []byte, blob []byte) {
168158 }
169159}
170160
171- // delete commits the node deletion to provided database batch in path mode.
172- func (t * pathTrie ) delete (path []byte , inner bool ) {
173- if t .owner == (common.Hash {}) {
174- if rawdb .ExistsAccountTrieNode (t .db , path ) {
175- rawdb .DeleteAccountTrieNode (t .batch , path )
176- if inner {
177- accountInnerDeleteGauge .Inc (1 )
178- } else {
179- accountOuterDeleteGauge .Inc (1 )
180- }
181- }
182- if inner {
183- accountInnerLookupGauge .Inc (1 )
184- } else {
185- accountOuterLookupGauge .Inc (1 )
186- }
161+ func (t * pathTrie ) deleteAccountNode (path []byte , inner bool ) {
162+ if inner {
163+ accountInnerLookupGauge .Inc (1 )
164+ } else {
165+ accountOuterLookupGauge .Inc (1 )
166+ }
167+ if ! rawdb .ExistsAccountTrieNode (t .db , path ) {
187168 return
188169 }
189- if rawdb .ExistsStorageTrieNode (t .db , t .owner , path ) {
190- rawdb .DeleteStorageTrieNode (t .batch , t .owner , path )
191- if inner {
192- storageInnerDeleteGauge .Inc (1 )
193- } else {
194- storageOuterDeleteGauge .Inc (1 )
195- }
170+ if inner {
171+ accountInnerDeleteGauge .Inc (1 )
172+ } else {
173+ accountOuterDeleteGauge .Inc (1 )
196174 }
175+ rawdb .DeleteAccountTrieNode (t .batch , path )
176+ }
177+
178+ func (t * pathTrie ) deleteStorageNode (path []byte , inner bool ) {
197179 if inner {
198180 storageInnerLookupGauge .Inc (1 )
199181 } else {
200182 storageOuterLookupGauge .Inc (1 )
201183 }
184+ if ! rawdb .ExistsStorageTrieNode (t .db , t .owner , path ) {
185+ return
186+ }
187+ if inner {
188+ storageInnerDeleteGauge .Inc (1 )
189+ } else {
190+ storageOuterDeleteGauge .Inc (1 )
191+ }
192+ rawdb .DeleteStorageTrieNode (t .batch , t .owner , path )
193+ }
194+
195+ // delete commits the node deletion to provided database batch in path mode.
196+ func (t * pathTrie ) delete (path []byte , inner bool ) {
197+ if t .owner == (common.Hash {}) {
198+ t .deleteAccountNode (path , inner )
199+ } else {
200+ t .deleteStorageNode (path , inner )
201+ }
202202}
203203
204204// update implements genTrie interface, inserting a (key, value) pair into the
@@ -208,8 +208,8 @@ func (t *pathTrie) update(key, value []byte) error {
208208}
209209
210210// commit implements genTrie interface, flushing the right boundary if it's
211- // regarded as complete. Otherwise, the nodes on the right boundary are discarded
212- // and cleaned up.
211+ // considered as complete. Otherwise, the nodes on the right boundary are
212+ // discarded and cleaned up.
213213//
214214// Note, this function must be called before flushing database batch, otherwise,
215215// dangling nodes might be left in database.
@@ -218,51 +218,43 @@ func (t *pathTrie) commit(complete bool) common.Hash {
218218 // The nodes on both left and right boundary will still be filtered
219219 // out if left boundary filtering is configured.
220220 if complete {
221- // The produced hash is meaningless if left side is incomplete
221+ // Commit all inserted but not yet committed nodes(on the right
222+ // boundary) in the stackTrie.
223+ hash := t .tr .Hash ()
222224 if t .skipLeftBoundary {
223- return common.Hash {}
225+ return common.Hash {} // hash is meaningless if left side is incomplete
224226 }
225- return t . tr . Hash ()
227+ return hash
226228 }
227- // If the right boundary is claimed as incomplete, the uncommitted
228- // nodes should be discarded, as they might be incomplete due to
229- // missing children on the right side. Furthermore, previously committed
230- // nodes can be the children of the right boundary nodes; therefore,
231- // the nodes of the right boundary must be cleaned out!
229+ // Discard nodes on the right boundary as it's claimed as incomplete. These
230+ // nodes might be incomplete due to missing children on the right side.
231+ // Furthermore, the potential leftover nodes on right boundary should also
232+ // be cleaned out.
232233 //
233- // The position of the last complete sub-trie (e.g., N_1) can be correctly
234- // determined by the last produced node (e.g., N_4 ), with a branch node
235- // (e.g., N_5) as the common parent for the shared path prefix. Therefore,
236- // the nodes along the path from the root to N_4 can be regarded as the
237- // right boundary and the parent of the last complete subtrie .
234+ // The right boundary can be uniquely determined by the last committed node
235+ // from stackTrie (e.g., N_1 ), as the shared path prefix between the last
236+ // two inserted state items is deterministic ( the path of N_3). The path
237+ // from trie root towards the last committed node is considered the right
238+ // boundary (root to N_3) .
238239 //
239- // +-----+
240- // | N_5 | parent for shared path prefix
241- // +-----+
242- // /- -\
243- // +-----+ +-----+
244- // Last complete subtrie | N_1 | | N_4 | Last produced node
245- // +-----+ +-----+
246- // /- -\
247- // N-2 .. N-3
240+ // +-----+
241+ // | N_3 | shared path prefix of last two states
242+ // +-----+
243+ // /- -\
244+ // +-----+ +-----+
245+ // Last committed node | N_1 | | N_2 | latest inserted node (contain last state)
246+ // +-----+ +-----+
248247 //
249248 // Another interesting scenario occurs when the trie is committed due to
250249 // too many items being accumulated in the batch. To flush them out to
251- // the database, the path of the last inserted item is temporarily treated
252- // as an incomplete right boundary, and nodes on this path are removed.
253- //
250+ // the database, the path of the last inserted node (N_2) is temporarily
251+ // treated as an incomplete right boundary, and nodes on this path are
252+ // removed (e.g. from root to N_3).
254253 // However, this path will be reclaimed as an internal path by inserting
255- // more items after the batch flush. Newly produced nodes on this path
256- // can be committed with no issues as they are actually complete (also
257- // from a database perspective, first deleting and then rewriting is
258- // still a valid data update).
259- //
260- // Nodes must be cleaned from top to bottom as it's possible the procedure
261- // is interrupted in the middle.
254+ // more items after the batch flush. New nodes on this path can be committed
255+ // with no issues as they are actually complete. Also, from a database
256+ // perspective, first deleting and then rewriting is a valid data update.
262257 for i := 0 ; i < len (t .last ); i ++ {
263- // The node with the path of the last produced node is not removed, as
264- // it's a sibling of the last complete sub-trie, not the parent. There
265- // is no reason to remove it.
266258 t .delete (t .last [:i ], false )
267259 }
268260 return common.Hash {} // the hash is meaningless for incomplete commit
0 commit comments