diff --git a/CHANGELOG.md b/CHANGELOG.md index c5fbb545706a..04c59a6667b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ Ref: https://keepachangelog.com/en/1.0.0/ * (server) [#25632](https://github.com/cosmos/cosmos-sdk/pull/25632) Add missing call to close the app on shutdown. * (server) [#25740](https://github.com/cosmos/cosmos-sdk/pull/25740) Add variadic `grpc.DialOption` parameter to `StartGrpcServer` for custom gRPC client connection options. * (blockstm) [#25765](https://github.com/cosmos/cosmos-sdk/pull/25765) Minor code readability improvement in block-stm. +* (blockstm) [#25767](https://github.com/cosmos/cosmos-sdk/pull/25767) Optimize block-stm memory allocation using bitmap and sync.Map. ### Bug Fixes diff --git a/blockstm/mvdata.go b/blockstm/mvdata.go index 0f4395542fab..b466ee5d66f6 100644 --- a/blockstm/mvdata.go +++ b/blockstm/mvdata.go @@ -42,30 +42,32 @@ func NewGMVData[V any](isZero func(V) bool, valueLen func(V) int) *GMVData[V] { } } -// getTree returns `nil` if not found -func (d *GMVData[V]) getTree(key Key) *tree.BTree[secondaryDataItem[V]] { +// getStore returns `nil` if not found +func (d *GMVData[V]) getStore(key Key) *SecondaryStore[V] { outer, _ := d.Get(dataItem[V]{Key: key}) - return outer.Tree + return outer.Store } -// getTreeOrDefault set a new tree atomically if not found. -func (d *GMVData[V]) getTreeOrDefault(key Key) *tree.BTree[secondaryDataItem[V]] { - return d.GetOrDefault(dataItem[V]{Key: key}, (*dataItem[V]).Init).Tree +// getStoreOrDefault set a new tree atomically if not found. +func (d *GMVData[V]) getStoreOrDefault(key Key) *SecondaryStore[V] { + return d.GetOrDefault(dataItem[V]{Key: key}, (*dataItem[V]).Init).Store } func (d *GMVData[V]) Write(key Key, value V, version TxnVersion) { - tree := d.getTreeOrDefault(key) - tree.Set(secondaryDataItem[V]{Index: version.Index, Incarnation: version.Incarnation, Value: value}) + tree := d.getStoreOrDefault(key) + tree.Set(version.Index, secondaryDataItem[V]{Incarnation: version.Incarnation, Value: value}) } func (d *GMVData[V]) WriteEstimate(key Key, txn TxnIndex) { - tree := d.getTreeOrDefault(key) - tree.Set(secondaryDataItem[V]{Index: txn, Estimate: true}) + tree := d.getStoreOrDefault(key) + tree.Set(txn, secondaryDataItem[V]{Estimate: true}) } func (d *GMVData[V]) Delete(key Key, txn TxnIndex) { - tree := d.getTreeOrDefault(key) - tree.Delete(secondaryDataItem[V]{Index: txn}) + tree := d.getStore(key) + if tree != nil { + tree.Delete(txn) + } } // Read returns the value and the version of the value that's less than the given txn. @@ -78,18 +80,18 @@ func (d *GMVData[V]) Read(key Key, txn TxnIndex) (V, TxnVersion, bool) { return zero, InvalidTxnVersion, false } - tree := d.getTree(key) - if tree == nil { + store := d.getStore(key) + if store == nil { return zero, InvalidTxnVersion, false } // find the closest txn that's less than the given txn - item, ok := seekClosestTxn(tree, txn) + idx, item, ok := store.PreviousValue(txn) if !ok { return zero, InvalidTxnVersion, false } - return item.Value, item.Version(), item.Estimate + return item.Value, TxnVersion{idx, item.Incarnation}, item.Estimate } func (d *GMVData[V]) Iterator( @@ -168,12 +170,13 @@ func (d *GMVData[V]) Snapshot() (snapshot []GKVPair[V]) { func (d *GMVData[V]) SnapshotTo(cb func(Key, V) bool) { d.Scan(func(outer dataItem[V]) bool { - item, ok := outer.Tree.Max() + item, ok := outer.Store.Max() if !ok { return true } if item.Estimate { + // should not happen, just to keep it complete return true } @@ -200,13 +203,13 @@ type GKVPair[V any] struct { type KVPair = GKVPair[[]byte] type dataItem[V any] struct { - Key Key - Tree *tree.BTree[secondaryDataItem[V]] + Key Key + Store *SecondaryStore[V] } func (d *dataItem[V]) Init() { - if d.Tree == nil { - d.Tree = tree.NewBTree(secondaryLesser[V], InnerBTreeDegree) + if d.Store == nil { + d.Store = NewSecondaryStore[V]() } } @@ -215,23 +218,3 @@ var _ tree.KeyItem = dataItem[[]byte]{} func (item dataItem[V]) GetKey() []byte { return item.Key } - -type secondaryDataItem[V any] struct { - Index TxnIndex - Incarnation Incarnation - Value V - Estimate bool -} - -func secondaryLesser[V any](a, b secondaryDataItem[V]) bool { - return a.Index < b.Index -} - -func (item secondaryDataItem[V]) Version() TxnVersion { - return TxnVersion{Index: item.Index, Incarnation: item.Incarnation} -} - -// seekClosestTxn returns the closest txn that's less than the given txn. -func seekClosestTxn[V any](tree *tree.BTree[secondaryDataItem[V]], txn TxnIndex) (secondaryDataItem[V], bool) { - return tree.ReverseSeek(secondaryDataItem[V]{Index: txn - 1}) -} diff --git a/blockstm/mviterator.go b/blockstm/mviterator.go index ce2f2fdfcc37..56de941d4a6f 100644 --- a/blockstm/mviterator.go +++ b/blockstm/mviterator.go @@ -75,7 +75,7 @@ func (it *MVIterator[V]) ReadEstimateValue() bool { func (it *MVIterator[V]) resolveValue() { inner := &it.BTreeIteratorG for ; inner.Valid(); inner.Next() { - v, ok := it.resolveValueInner(inner.Item().Tree) + idx, v, ok := it.resolveValueInner(inner.Item().Store) if !ok { // abort the iterator it.Invalidate() @@ -84,11 +84,12 @@ func (it *MVIterator[V]) resolveValue() { return } if v == nil { + // value not found continue } it.value = v.Value - it.version = v.Version() + it.version = TxnVersion{idx, v.Incarnation} if it.Executing() { it.reads = append(it.reads, ReadDescriptor{ Key: inner.Item().Key, @@ -102,25 +103,25 @@ func (it *MVIterator[V]) resolveValue() { // resolveValueInner loop until we find a value that is not an estimate, // wait for dependency if gets an ESTIMATE. // returns: -// - (nil, true) if the value is not found -// - (nil, false) if the value is an estimate and we should fail the validation -// - (v, true) if the value is found -func (it *MVIterator[V]) resolveValueInner(tree *tree.BTree[secondaryDataItem[V]]) (*secondaryDataItem[V], bool) { +// - (0, nil, true) if the value is not found +// - (0, nil, false) if the value is an estimate and we should fail the validation +// - (idx, v, true) if the value is found at idx +func (it *MVIterator[V]) resolveValueInner(tree *SecondaryStore[V]) (TxnIndex, *secondaryDataItem[V], bool) { for { - v, ok := seekClosestTxn(tree, it.txn) + idx, v, ok := tree.PreviousValue(it.txn) if !ok { - return nil, true + return 0, nil, true } if v.Estimate { if it.Executing() { - it.waitFn(v.Index) + it.waitFn(idx) continue } // in validation mode, it should fail validation immediately - return nil, false + return 0, nil, false } - return &v, true + return idx, &v, true } } diff --git a/blockstm/secondary_store.go b/blockstm/secondary_store.go new file mode 100644 index 000000000000..93805b4822a3 --- /dev/null +++ b/blockstm/secondary_store.go @@ -0,0 +1,79 @@ +package blockstm + +import ( + "sync" + + "github.com/RoaringBitmap/roaring/v2" +) + +type secondaryDataItem[V any] struct { + Incarnation Incarnation + Value V + Estimate bool +} + +type SecondaryStore[V any] struct { + // mu only protects bitmap, we don't sychonize between bitmap and data, + // the reader can observe a version in bitmap but not in data, which is handled at reader side. + mu sync.RWMutex + bitmap roaring.Bitmap + + data sync.Map +} + +func NewSecondaryStore[V any]() *SecondaryStore[V] { + return &SecondaryStore[V]{} +} + +func (s *SecondaryStore[V]) Set(version TxnIndex, item secondaryDataItem[V]) { + s.mu.Lock() + s.bitmap.Add(uint32(version)) + s.mu.Unlock() + + s.data.Store(version, item) +} + +func (s *SecondaryStore[V]) Delete(version TxnIndex) { + s.mu.Lock() + s.bitmap.Remove(uint32(version)) + s.mu.Unlock() + + s.data.Delete(version) +} + +// PreviousValue returns the closest version that's less than the given version, exclusive. +func (s *SecondaryStore[V]) PreviousValue(target TxnIndex) (TxnIndex, secondaryDataItem[V], bool) { + for target > 0 { + s.mu.RLock() + prev := s.bitmap.PreviousValue(uint32(target - 1)) + s.mu.RUnlock() + + if prev == -1 { + return 0, secondaryDataItem[V]{}, false + } + + target = TxnIndex(prev) + value, ok := s.data.Load(target) + if ok { + return target, value.(secondaryDataItem[V]), true + } + } + + return 0, secondaryDataItem[V]{}, false +} + +// Max is only called at block commit time, no need to synchronize. +func (s *SecondaryStore[V]) Max() (secondaryDataItem[V], bool) { + if s.bitmap.IsEmpty() { + return secondaryDataItem[V]{}, false + } + + target := TxnIndex(s.bitmap.Maximum()) + + value, ok := s.data.Load(target) + if !ok { + return secondaryDataItem[V]{}, false + } + + return value.(secondaryDataItem[V]), true +} diff --git a/blockstm/stm.go b/blockstm/stm.go index 307494b357d7..e2b7a0ccb953 100644 --- a/blockstm/stm.go +++ b/blockstm/stm.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "math" "runtime" "golang.org/x/sync/errgroup" @@ -36,6 +37,10 @@ func ExecuteBlockWithEstimates( estimates []MultiLocations, // txn -> multi-locations txExecutor TxExecutor, ) error { + if blockSize > math.MaxUint32 { + return fmt.Errorf("block size overflows uint32: %d", blockSize) + } + if executors < 0 { return fmt.Errorf("invalid number of executors: %d", executors) } diff --git a/go.mod b/go.mod index b62e0acb9d68..9b60eb07bb2f 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( cosmossdk.io/store v1.3.0-beta.0 cosmossdk.io/x/tx v0.14.0 github.com/99designs/keyring v1.2.1 + github.com/RoaringBitmap/roaring/v2 v2.14.4 github.com/bgentry/speakeasy v0.2.0 github.com/bits-and-blooms/bitset v1.24.4 github.com/chzyer/readline v1.5.1 @@ -191,6 +192,7 @@ require ( github.com/mattn/go-colorable v0.1.14 // indirect github.com/minio/highwayhash v1.0.3 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/mtibben/percent v0.2.1 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/oasisprotocol/curve25519-voi v0.0.0-20230904125328-1f23a7beb09a // indirect diff --git a/go.sum b/go.sum index 90c72f8cec77..f18654f70b8b 100644 --- a/go.sum +++ b/go.sum @@ -68,6 +68,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw= github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk= +github.com/RoaringBitmap/roaring/v2 v2.14.4 h1:4aKySrrg9G/5oRtJ3TrZLObVqxgQ9f1znCRBwEwjuVw= +github.com/RoaringBitmap/roaring/v2 v2.14.4/go.mod h1:oMvV6omPWr+2ifRdeZvVJyaz+aoEUopyv5iH0u/+wbY= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE= @@ -612,6 +614,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/mtibben/percent v0.2.1 h1:5gssi8Nqo8QU/r2pynCm+hBQHpkB/uNK7BJCFogWdzs= github.com/mtibben/percent v0.2.1/go.mod h1:KG9uO+SZkUp+VkRHsCdYQV3XSZrrSpR3O9ibNBTZrns= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=