Skip to content

Commit 744884b

Browse files
committed
go/oasis-node/cmd/storage: Bootstrap off-chain checkpoint sync
1 parent 983307e commit 744884b

2 files changed

Lines changed: 260 additions & 31 deletions

File tree

go/oasis-node/cmd/storage/checkpoint.go

Lines changed: 253 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@ import (
88
"path/filepath"
99

1010
cmtCfg "github.com/cometbft/cometbft/config"
11-
cmtNode "github.com/cometbft/cometbft/node"
11+
cmtProtoState "github.com/cometbft/cometbft/proto/tendermint/state"
12+
cmtProto "github.com/cometbft/cometbft/proto/tendermint/types"
13+
cmtState "github.com/cometbft/cometbft/state"
14+
"github.com/cometbft/cometbft/store"
15+
"github.com/cometbft/cometbft/types"
16+
cmttypes "github.com/cometbft/cometbft/types"
17+
"github.com/cometbft/cometbft/version"
18+
"github.com/cosmos/gogoproto/proto"
1219
"github.com/spf13/cobra"
1320

1421
"github.com/oasisprotocol/oasis-core/go/common"
15-
"github.com/oasisprotocol/oasis-core/go/config"
16-
cmtCommon "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/common"
17-
cmtDB "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/db"
22+
"github.com/oasisprotocol/oasis-core/go/common/cbor"
1823
cmdCommon "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common"
1924
"github.com/oasisprotocol/oasis-core/go/storage/mkvs/checkpoint"
2025
"github.com/oasisprotocol/oasis-core/go/storage/mkvs/db/api"
@@ -24,8 +29,15 @@ import (
2429
const (
2530
consensusSubdir = "consensus"
2631
runtimesSubdir = "runtimes"
32+
33+
consensusMetaFilename = "bootstrap.cbor"
2734
)
2835

36+
type bootstrapMeta struct {
37+
State []byte `json:"state"`
38+
Commit []byte `json:"commit"`
39+
}
40+
2941
func newCheckpointCmd() *cobra.Command {
3042
cmd := &cobra.Command{
3143
Use: "checkpoint",
@@ -72,7 +84,13 @@ func newCreateCmd() *cobra.Command {
7284
defer close()
7385

7486
consensusOutDir := filepath.Join(outDir, consensusSubdir)
75-
return createConsensusCheckpoint(cmd.Context(), ndb, height, consensusOutDir)
87+
if err := createConsensusCheckpoint(cmd.Context(), ndb, height, consensusOutDir); err != nil {
88+
return err
89+
}
90+
if err := writeConsensusBootstrap(cmd.Context(), cmdCommon.DataDir(), ndb, height, consensusOutDir); err != nil {
91+
return fmt.Errorf("failed to write bootstrap metadata: %w", err)
92+
}
93+
return nil
7694
}
7795

7896
createRuntimeCps := func() error {
@@ -182,8 +200,8 @@ func createRuntimeCheckpoints(ctx context.Context, ndb api.NodeDB, round uint64,
182200
if err != nil {
183201
return fmt.Errorf("failed to get roots for round %d: %w", round, err)
184202
}
185-
if lenRoots := len(roots); lenRoots != 2 {
186-
return fmt.Errorf("unexpected number of roots: got %d, want %d", lenRoots, 2)
203+
if lenRoots := len(roots); 0 == lenRoots || lenRoots > 2 { // Empty IO root is implicitly present.
204+
return fmt.Errorf("unexpected number of roots: got %d", lenRoots)
187205
}
188206
return createCheckpoints(ctx, ndb, roots, outputDir)
189207
}
@@ -204,6 +222,67 @@ func createCheckpoints(ctx context.Context, ndb api.NodeDB, roots []node.Root, o
204222
return nil
205223
}
206224

225+
func writeConsensusBootstrap(ctx context.Context, dataDir string, ndb api.NodeDB, height uint64, outputDir string) error {
226+
227+
stateStore, err := openConsensusStatestore(dataDir)
228+
if err != nil {
229+
return fmt.Errorf("failed to open cometbft state store: %w", err)
230+
}
231+
defer stateStore.Close()
232+
233+
blockStore, err := openConsensusBlockstore(dataDir)
234+
if err != nil {
235+
return fmt.Errorf("failed to open consensus blockstore: %w", err)
236+
}
237+
defer blockStore.Close()
238+
239+
state, err := State(ctx, height, stateStore, blockStore)
240+
if err != nil {
241+
return fmt.Errorf("failed to load consensus state at height %d: %w", height, err)
242+
}
243+
statePB, err := state.ToProto()
244+
if err != nil {
245+
return fmt.Errorf("failed to convert consensus state to proto: %w", err)
246+
}
247+
stateBytes, err := proto.Marshal(statePB)
248+
if err != nil {
249+
return fmt.Errorf("failed to marshal consensus state: %w", err)
250+
}
251+
252+
commit, err := Commit(ctx, blockStore, height)
253+
if err != nil {
254+
return fmt.Errorf("failed to load consensus commit at height %d: %w", height, err)
255+
}
256+
commitBytes, err := proto.Marshal(commit.ToProto())
257+
if err != nil {
258+
return fmt.Errorf("failed to marshal consensus commit: %w", err)
259+
}
260+
261+
meta := bootstrapMeta{
262+
State: stateBytes,
263+
Commit: commitBytes,
264+
}
265+
if err := os.WriteFile(filepath.Join(outputDir, consensusMetaFilename), cbor.Marshal(meta), 0o600); err != nil {
266+
return fmt.Errorf("failed to write bootstrap metadata: %w", err)
267+
}
268+
269+
return nil
270+
}
271+
272+
func readConsensusBootstrap(inputDir string) (bootstrapMeta, error) {
273+
data, err := os.ReadFile(filepath.Join(inputDir, consensusMetaFilename))
274+
if err != nil {
275+
return bootstrapMeta{}, err
276+
}
277+
278+
var meta bootstrapMeta
279+
if err := cbor.Unmarshal(data, &meta); err != nil {
280+
return bootstrapMeta{}, fmt.Errorf("failed to decode bootstrap metadata: %w", err)
281+
}
282+
283+
return meta, nil
284+
}
285+
207286
func restoreConsensusCp(ctx context.Context, dataDir, inputDir string) error {
208287
ndb, close, err := openConsensusNodeDB(cmdCommon.DataDir())
209288
if err != nil {
@@ -233,31 +312,12 @@ func restoreConsensusCp(ctx context.Context, dataDir, inputDir string) error {
233312
return fmt.Errorf("failed to restore checkpoint: %w", err)
234313
}
235314

236-
// TODO: This is just to show how CometBFT is meant to synchronize block and state stores
237-
// when the state sync is done offline (aka import checkpoint). Obviously we use our custom
238-
// genesis/doc provider and light clients. In theory this shows we could make bootstrap of
239-
// oasis node from the checkpoints completely trustless!
240-
//
241-
// In practice I plan to write our own version of `BootstrapUntrustedState`, where the node
242-
// creating checkpoints, so also dump bootstrap metadata.
243-
cmtConfig := cmtCfg.DefaultConfig()
244-
cmtConfig.SetRoot(filepath.Join(cmdCommon.DataDir(), cmtCommon.StateDir))
245-
dbProvider, err := cmtDB.Provider()
315+
meta, err := readConsensusBootstrap(inputDir)
246316
if err != nil {
247-
return fmt.Errorf("failed to obtain db provider: %w", err)
248-
}
249-
cmtConfig.Genesis = config.GlobalConfig.Genesis.File
250-
cmtConfig.StateSync.RPCServers = config.GlobalConfig.Consensus.Providers
251-
cmtConfig.StateSync.TrustPeriod = config.GlobalConfig.Consensus.LightClient.Trust.Period
252-
cmtConfig.StateSync.TrustHeight = int64(config.GlobalConfig.Consensus.LightClient.Trust.Height)
253-
cmtConfig.StateSync.TrustHash = config.GlobalConfig.Consensus.LightClient.Trust.Hash
254-
255-
root := cps[0].Root
256-
if err := cmtNode.BootstrapState(ctx, cmtConfig, dbProvider, root.Version, root.Hash[:]); err != nil {
257-
return fmt.Errorf("failed to bootstrap cometbft dbs from the state DB: %w", err)
317+
return fmt.Errorf("failed to read bootstrap metadata: %w", err)
258318
}
259319

260-
return nil
320+
return bootstrapTrustedState(ctx, dataDir, meta)
261321
}
262322

263323
func restoreRuntimeCps(ctx context.Context, inputDir, namespace string) error {
@@ -322,3 +382,167 @@ func restoreCheckpoints(ctx context.Context, provider checkpoint.ChunkProvider,
322382

323383
return nil
324384
}
385+
386+
// bootstrapTrustedState synchronizes the cometbft databases after the state sync
387+
// has been performed offline.
388+
//
389+
// It is expected that the block store and state store are empty at the time the
390+
// function is called.
391+
//
392+
// Adapted from https://github.com/oasisprotocol/cometbft/blob/08e22df73d354512fc27bd0c5731b3dcf1f8fef7/node/node.go#L198.
393+
func bootstrapTrustedState(ctx context.Context, dataDir string, meta bootstrapMeta) error {
394+
stateDB, err := openConsensusStateDB(dataDir)
395+
if err != nil {
396+
return fmt.Errorf("failed to open cometbft state store: %w", err)
397+
}
398+
defer stateDB.Close()
399+
400+
blockStore, err := openConsensusBlockstore(dataDir)
401+
if err != nil {
402+
return fmt.Errorf("failed to open consensus blockstore: %w", err)
403+
}
404+
defer blockStore.Close()
405+
406+
if !blockStore.IsEmpty() {
407+
return fmt.Errorf("blockstore not empty, trying to initialize non empty state")
408+
}
409+
410+
stateStore := cmtState.NewBootstrapStore(stateDB, cmtState.StoreOptions{
411+
DiscardABCIResponses: cmtCfg.DefaultConfig().Storage.DiscardABCIResponses,
412+
})
413+
defer stateStore.Close()
414+
415+
state, err := stateStore.Load()
416+
if err != nil {
417+
return err
418+
}
419+
420+
if !state.IsEmpty() {
421+
return fmt.Errorf("state not empty, trying to initialize non empty state")
422+
}
423+
424+
var statePB cmtProtoState.State
425+
if err := proto.Unmarshal(meta.State, &statePB); err != nil {
426+
return fmt.Errorf("failed to unmarshal consensus state: %w", err)
427+
}
428+
metaState, err := cmtState.FromProto(&statePB)
429+
if err != nil {
430+
return fmt.Errorf("failed to parse consensus state: %w", err)
431+
}
432+
433+
var commitPB cmtProto.Commit
434+
if err := proto.Unmarshal(meta.Commit, &commitPB); err != nil {
435+
return fmt.Errorf("failed to unmarshal consensus commit: %w", err)
436+
}
437+
commit, err := cmttypes.CommitFromProto(&commitPB)
438+
if err != nil {
439+
return fmt.Errorf("failed to parse consensus commit: %w", err)
440+
}
441+
442+
if err = stateStore.Bootstrap(*metaState); err != nil {
443+
return err
444+
}
445+
446+
err = blockStore.SaveSeenCommit(metaState.LastBlockHeight, commit)
447+
if err != nil {
448+
return err
449+
}
450+
451+
// Once the stores are bootstrapped, we need to set the height at which the node has finished
452+
// statesyncing. This will allow the blocksync reactor to fetch blocks at a proper height.
453+
// In case this operation fails, it is equivalent to a failure in online state sync where the operator
454+
// needs to manually delete the state and blockstores and rerun the bootstrapping process.
455+
err = stateStore.SetOfflineStateSyncHeight(metaState.LastBlockHeight)
456+
if err != nil {
457+
return fmt.Errorf("failed to set synced height: %w", err)
458+
}
459+
460+
return err
461+
}
462+
463+
// Commit is adapted and simplified and mimics StateProvider behaviour used in the upstream BootstrapState.
464+
func Commit(ctx context.Context, blockStore *store.BlockStore, height uint64) (*types.Commit, error) {
465+
commit := blockStore.LoadBlockCommit(int64(height))
466+
if commit == nil {
467+
return nil, fmt.Errorf("commit not found at height %d", height)
468+
}
469+
return commit, nil
470+
}
471+
472+
// State is adapted and mimics StateProvider behaviour used in the upstream BootstrapState.
473+
func State(ctx context.Context, height uint64, stateStore cmtState.Store, blockStore *store.BlockStore) (cmtState.State, error) {
474+
475+
// The snapshot height maps onto the state heights as follows:
476+
//
477+
// height: last block, i.e. the snapshotted height
478+
// height+1: current block, i.e. the first block we'll process after the snapshot
479+
// height+2: next block, i.e. the second block after the snapshot
480+
//
481+
// We need to fetch the NextValidators from height+2 because if the application changed
482+
// the validator set at the snapshot height then this only takes effect at height+2.
483+
h := int64(height)
484+
lastMeta := blockStore.LoadBlockMeta(h)
485+
if lastMeta == nil {
486+
return cmtState.State{}, fmt.Errorf("block meta not found at height %d", h)
487+
}
488+
currentMeta := blockStore.LoadBlockMeta(h + 1)
489+
if currentMeta == nil {
490+
return cmtState.State{}, fmt.Errorf("block meta not found at height %d", h+1)
491+
}
492+
nextMeta := blockStore.LoadBlockMeta(h + 2)
493+
if nextMeta == nil {
494+
return cmtState.State{}, fmt.Errorf("block meta not found at height %d", h+2)
495+
}
496+
497+
lastVals, err := stateStore.LoadValidators(h)
498+
if err != nil {
499+
return cmtState.State{}, err
500+
}
501+
currentVals, err := stateStore.LoadValidators(h + 1)
502+
if err != nil {
503+
return cmtState.State{}, err
504+
}
505+
nextVals, err := stateStore.LoadValidators(h + 2)
506+
if err != nil {
507+
return cmtState.State{}, err
508+
}
509+
510+
consensusParams, err := stateStore.LoadConsensusParams(h + 1)
511+
if err != nil {
512+
return cmtState.State{}, err
513+
}
514+
515+
storeState, err := stateStore.Load()
516+
if err != nil {
517+
return cmtState.State{}, err
518+
}
519+
if storeState.IsEmpty() {
520+
return cmtState.State{}, fmt.Errorf("state store is empty")
521+
}
522+
523+
state := cmtState.State{
524+
ChainID: storeState.ChainID,
525+
Version: cmtProtoState.Version{
526+
Consensus: currentMeta.Header.Version,
527+
Software: version.TMCoreSemVer,
528+
},
529+
InitialHeight: storeState.InitialHeight,
530+
}
531+
if state.InitialHeight == 0 {
532+
state.InitialHeight = 1
533+
}
534+
535+
state.LastBlockHeight = lastMeta.Header.Height
536+
state.LastBlockTime = lastMeta.Header.Time
537+
state.LastBlockID = lastMeta.BlockID
538+
state.AppHash = currentMeta.Header.AppHash
539+
state.LastResultsHash = currentMeta.Header.LastResultsHash
540+
state.LastValidators = lastVals
541+
state.Validators = currentVals
542+
state.NextValidators = nextVals
543+
state.LastHeightValidatorsChanged = nextMeta.Header.Height
544+
state.ConsensusParams = consensusParams
545+
state.LastHeightConsensusParamsChanged = currentMeta.Header.Height
546+
547+
return state, nil
548+
}

go/oasis-node/cmd/storage/dbs.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"path/filepath"
66

7+
cometbftDB "github.com/cometbft/cometbft-db"
78
cmtCfg "github.com/cometbft/cometbft/config"
89
"github.com/cometbft/cometbft/state"
910
"github.com/cometbft/cometbft/store"
@@ -61,15 +62,19 @@ func openConsensusBlockstore(dataDir string) (*store.BlockStore, error) {
6162
return blockstore, nil
6263
}
6364

64-
func openConsensusStatestore(dataDir string) (state.Store, error) {
65+
func openConsensusStateDB(dataDir string) (cometbftDB.DB, error) {
6566
cmtConfig := cmtCfg.DefaultConfig()
6667
cmtConfig.SetRoot(filepath.Join(dataDir, cmtCommon.StateDir))
6768

6869
dbProvider, err := cmtDB.Provider()
6970
if err != nil {
7071
return nil, fmt.Errorf("failed to obtain db provider: %w", err)
7172
}
72-
stateDB, err := cmtDB.OpenStateDB(dbProvider, cmtConfig)
73+
return cmtDB.OpenStateDB(dbProvider, cmtConfig)
74+
}
75+
76+
func openConsensusStatestore(dataDir string) (state.Store, error) {
77+
stateDB, err := openConsensusStateDB(dataDir)
7378
if err != nil {
7479
return nil, fmt.Errorf("failed to open state db: %w", err)
7580
}

0 commit comments

Comments
 (0)