Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4c2766d
Add "WeakOwner", a KV-based weak ownership mechanism
arielshaqed Oct 6, 2024
1ef841a
Obtain weak ownership of branch on all `BranchUpdate` operations
arielshaqed Oct 7, 2024
c17c472
Better default parameters for branch ownership
arielshaqed Oct 7, 2024
1942157
Add lakefs abuse merge command
arielshaqed Oct 7, 2024
1b7b740
Make branch weak ownership configurable
arielshaqed Oct 7, 2024
bd63ee4
Straighten out interval handling and fix checks-validator
arielshaqed Oct 8, 2024
c6b412c
[CR] Bug: Ensure single owner succeeds the first time a key is owned
arielshaqed Oct 8, 2024
7624545
Remove log print from test - confusing in a codebase
arielshaqed Oct 8, 2024
5566132
[CR] Fix comments, error phrasing, and command descriptions
arielshaqed Oct 10, 2024
c94c7a7
[CR] Clarify request ID handling when missing, rename own -> release
arielshaqed Oct 10, 2024
eb84829
[CR] Remove finished sentinel and break ownership update loop on error
arielshaqed Oct 10, 2024
305c433
[CR] Run Esti test with branch ownership
arielshaqed Oct 10, 2024
c7ecd5f
Add log line to indicate ref manager started with weak ownership
arielshaqed Oct 10, 2024
0ec602d
[CR] Only reset if owned when cancelling weak ownership
arielshaqed Oct 14, 2024
007e04a
Add totals line to lakectl abuse statistics
arielshaqed Oct 25, 2024
3cc70c9
lakectl abuse merge: clean up branches before exiting
arielshaqed Oct 26, 2024
b198397
[CR] Correctly count KV ops in comments, and some minor cleanups
arielshaqed Oct 28, 2024
d352071
Rename basic "ownership" class and move it to pkg/distributed/
arielshaqed Oct 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/esti.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,9 @@ jobs:
name: Run latest lakeFS app on AWS S3
needs: [deploy-image, login-to-amazon-ecr]
runs-on: ubuntu-22.04
strategy:
matrix:
branch_ownership: [false, true]
env:
TAG: ${{ needs.deploy-image.outputs.tag }}
REPO: ${{ needs.login-to-amazon-ecr.outputs.registry }}
Expand Down Expand Up @@ -866,6 +869,7 @@ jobs:
LAKEFS_BLOCKSTORE_TYPE: s3
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_ACCESS_KEY_ID: ${{ secrets.ESTI_AWS_ACCESS_KEY_ID }}
LAKEFS_BLOCKSTORE_S3_CREDENTIALS_SECRET_ACCESS_KEY: ${{ secrets.ESTI_AWS_SECRET_ACCESS_KEY }}
LAKEFS_GRAVELER_BRANCH_OWNERSHIP_ENABLED: ${{ matrix.branch_ownership }}
LAKEFS_DATABASE_TYPE: postgres
DOCKER_REG: ${{ needs.login-to-amazon-ecr.outputs.registry }}
ESTI_BLOCKSTORE_TYPE: s3
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ gen-code: gen-api ## Run the generator for inline commands
./pkg/actions \
./pkg/auth/ \
./pkg/authentication \
./pkg/distributed \
./pkg/graveler \
./pkg/graveler/committed \
./pkg/graveler/sstable \
Expand Down
1 change: 0 additions & 1 deletion buf.gen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ plugins:
out: pkg
opt:
- paths=source_relative

197 changes: 197 additions & 0 deletions cmd/lakectl/cmd/abuse_merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package cmd

import (
"context"
"fmt"
"net/http"
"os"
"sync"
"syscall"
"time"

nanoid "github.com/matoous/go-nanoid/v2"
"github.com/spf13/cobra"
"github.com/treeverse/lakefs/pkg/api/apigen"
"github.com/treeverse/lakefs/pkg/api/apiutil"
"github.com/treeverse/lakefs/pkg/api/helpers"
"github.com/treeverse/lakefs/pkg/testutil/stress"
"github.com/treeverse/lakefs/pkg/uri"
)

// removeBranches removes all branches whose names start with prefix, in
// parallel. It reports (all) failures but does not fail.
func removeBranches(ctx context.Context, client *apigen.ClientWithResponses, parallelism int, repo, prefix string) {
toDelete := make(chan string)
pfx := apigen.PaginationPrefix(prefix)
after := apigen.PaginationAfter("")
go func() {
defer close(toDelete)
for {
resp, err := client.ListBranchesWithResponse(ctx, repo, &apigen.ListBranchesParams{
Prefix: &pfx,
After: &after,
})
if err != nil {
fmt.Printf("Failed to request to list branches %s/%s after %s: %s\n", repo, pfx, after, err)
}
if resp.JSON200 == nil {
fmt.Printf("Failed to list branches %s/%s after %s: %s\n", repo, pfx, after, resp.Status())
break
}
for _, result := range resp.JSON200.Results {
toDelete <- result.Id
}
if !resp.JSON200.Pagination.HasMore {
break
}
after = apigen.PaginationAfter(resp.JSON200.Pagination.NextOffset)
}
}()

wg := &sync.WaitGroup{}
wg.Add(parallelism)
for i := 0; i < parallelism; i++ {
go func() {
for branch := range toDelete {
resp, err := client.DeleteBranchWithResponse(ctx, repo, branch, &apigen.DeleteBranchParams{})
if err != nil {
fmt.Printf("Failed to request %s deletion: %s\n", branch, err)
continue
}
if resp.StatusCode() != http.StatusNoContent {
fmt.Printf("Failed to delete %s: %s\n", branch, resp.Status())
continue
}
}
wg.Done()
}()
}
wg.Wait()
}

var abuseMergeCmd = &cobra.Command{
Use: "merge <branch URI>",
Short: "Merge non-conflicting objects to the source branch in parallel",
Hidden: false,
Args: cobra.ExactArgs(1),
ValidArgsFunction: ValidArgsRepository,
Run: func(cmd *cobra.Command, args []string) {
u := MustParseBranchURI("branch URI", args[0])
amount := Must(cmd.Flags().GetInt("amount"))
parallelism := Must(cmd.Flags().GetInt("parallelism"))

fmt.Println("Source branch: ", u)

branchPrefix := "merge-" + nanoid.Must()
fmt.Println("Branch prefix: ", branchPrefix)

generator := stress.NewGenerator("merge", parallelism, stress.WithSignalHandlersFor(os.Interrupt, syscall.SIGTERM))

client := getClient()

// generate branch names as input
generator.Setup(func(add stress.GeneratorAddFn) {
for i := 0; i < amount; i++ {
add(fmt.Sprintf("%s-%04d", branchPrefix, i+1))
}
})

defer removeBranches(cmd.Context(), client, parallelism, u.Repository, branchPrefix)

resp, err := client.GetRepositoryWithResponse(cmd.Context(), u.Repository)
DieOnErrorOrUnexpectedStatusCode(resp, err, http.StatusOK)
if resp.JSON200 == nil {
DieFmt("Bad response from server: %+v", resp)
}

ctx := cmd.Context()

// execute ALL the things!
generator.Run(func(input chan string, output chan stress.Result) {
client := getClient()
for work := range input {
start := time.Now()
err := mergeSomething(ctx, client, u, work)
output <- stress.Result{
Error: err,
Took: time.Since(start),
}
// Don't block or sleep to maximise parallel load.
}
})
},
}

func mergeSomething(ctx context.Context, client *apigen.ClientWithResponses, base *uri.URI, name string) error {
createBranchResponse, err := client.CreateBranchWithResponse(ctx, base.Repository,
apigen.CreateBranchJSONRequestBody{
Name: name,
Source: base.Ref,
},
)
if err != nil || !apiutil.IsStatusCodeOK(createBranchResponse.StatusCode()) {
if err == nil {
err = helpers.ResponseAsError(createBranchResponse)
}
return fmt.Errorf("create branch %s: %w", name, err)
}

u := base.WithRef(name)
// Use a different name on each branch, to avoid conflicts.
path := fmt.Sprintf("object-%s", name)
u.Path = &path

getResponse, err := client.GetPhysicalAddressWithResponse(ctx, u.Repository, u.Ref, &apigen.GetPhysicalAddressParams{Path: *u.Path})
if err != nil || getResponse.JSON200 == nil {
if err == nil {
err = helpers.ResponseAsError(getResponse)
}
return fmt.Errorf("get physical address for %s: %w", name, err)
}
// Link the object but do not actually upload anything - it is not
// important for merging, and would only reduce load.
stagingLocation := getResponse.JSON200
linkResponse, err := client.LinkPhysicalAddressWithResponse(ctx, u.Repository, u.Ref,
&apigen.LinkPhysicalAddressParams{
Path: *u.Path,
},
apigen.LinkPhysicalAddressJSONRequestBody{
Checksum: "deadbeef0000cafe",
Staging: apigen.StagingLocation{
PhysicalAddress: stagingLocation.PhysicalAddress,
},
UserMetadata: nil,
})
if err != nil || linkResponse.JSON200 == nil {
if err == nil {
err = helpers.ResponseAsError(linkResponse)
}
return fmt.Errorf("link physical address for %s: %w", name, err)
}

commitResponse, err := client.CommitWithResponse(ctx, u.Repository, u.Ref, &apigen.CommitParams{}, apigen.CommitJSONRequestBody{Message: fmt.Sprintf("commit %s", name)})
if err != nil || commitResponse.JSON201 == nil {
if err == nil {
err = helpers.ResponseAsError(commitResponse)
}
return fmt.Errorf("commit for %s: %w", name, err)
}

mergeResponse, err := client.MergeIntoBranchWithResponse(ctx, u.Repository, u.Ref, base.Ref, apigen.MergeIntoBranchJSONRequestBody{})
if err != nil || mergeResponse.JSON200 == nil {
if err == nil {
err = helpers.ResponseAsError(mergeResponse)
}
return fmt.Errorf("merge from %s: %w", name, err)
}

return nil
}

//nolint:gochecknoinits,mnd
func init() {
abuseMergeCmd.Flags().Int("amount", 1000, "amount of merges to perform")
abuseMergeCmd.Flags().Int("parallelism", abuseDefaultParallelism, "number of merges to perform in parallel")

abuseCmd.AddCommand(abuseMergeCmd)
}
19 changes: 19 additions & 0 deletions docs/reference/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -3031,6 +3031,25 @@ lakectl abuse list <source ref URI> [flags]



### lakectl abuse merge

Merge non-conflicting objects to the source branch in parallel

```
lakectl abuse merge <branch URI> [flags]
```

#### Options
{:.no_toc}

```
--amount int amount of merges to perform (default 1000)
-h, --help help for merge
--parallelism int number of merges to perform in parallel (default 100)
```



### lakectl abuse random-delete

Delete keys from a file and generate random delete from the source ref for those keys.
Expand Down
26 changes: 19 additions & 7 deletions pkg/catalog/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,17 @@ func (c *ctxCloser) Close() error {
return nil
}

func makeBranchApproximateOwnershipParams(cfg config.ApproximatelyCorrectOwnership) ref.BranchApproximateOwnershipParams {
if !cfg.Enabled {
// zero Durations => no branch ownership
return ref.BranchApproximateOwnershipParams{}
}
return ref.BranchApproximateOwnershipParams{
AcquireInterval: cfg.Acquire,
RefreshInterval: cfg.Refresh,
}
}

func New(ctx context.Context, cfg Config) (*Catalog, error) {
ctx, cancelFn := context.WithCancel(ctx)
adapter, err := factory.BuildBlockAdapter(ctx, nil, cfg.Config)
Expand Down Expand Up @@ -364,13 +375,14 @@ func New(ctx context.Context, cfg Config) (*Catalog, error) {
addressProvider := ident.NewHexAddressProvider()
refManager := ref.NewRefManager(
ref.ManagerConfig{
Executor: executor,
KVStore: cfg.KVStore,
KVStoreLimited: storeLimiter,
AddressProvider: addressProvider,
RepositoryCacheConfig: ref.CacheConfig(cfg.Config.Graveler.RepositoryCache),
CommitCacheConfig: ref.CacheConfig(cfg.Config.Graveler.CommitCache),
MaxBatchDelay: cfg.Config.Graveler.MaxBatchDelay,
Executor: executor,
KVStore: cfg.KVStore,
KVStoreLimited: storeLimiter,
AddressProvider: addressProvider,
RepositoryCacheConfig: ref.CacheConfig(cfg.Config.Graveler.RepositoryCache),
CommitCacheConfig: ref.CacheConfig(cfg.Config.Graveler.CommitCache),
MaxBatchDelay: cfg.Config.Graveler.MaxBatchDelay,
BranchApproximateOwnershipParams: makeBranchApproximateOwnershipParams(cfg.Config.Graveler.BranchOwnership),
})
gcManager := retention.NewGarbageCollectionManager(tierFSParams.Adapter, refManager, cfg.Config.Committed.BlockStoragePrefix)
settingManager := settings.NewManager(refManager, cfg.KVStore)
Expand Down
14 changes: 14 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ type Database struct {
} `mapstructure:"cosmosdb"`
}

// ApproximateOwnership configures an approximate ("mostly correct") ownership.
type ApproximatelyCorrectOwnership struct {
Enabled bool `mapstructure:"enabled"`
Refresh time.Duration `mapstructure:"refresh"`
Acquire time.Duration `mapstructure:"acquire"`
}

// Config - Output struct of configuration, used to validate. If you read a key using a viper accessor
// rather than accessing a field of this struct, that key will *not* be validated. So don't
// do that.
Expand Down Expand Up @@ -330,6 +337,13 @@ type Config struct {
RateLimit int `mapstructure:"rate_limit"`
} `mapstructure:"background"`
MaxBatchDelay time.Duration `mapstructure:"max_batch_delay"`
// Parameters for tuning performance of concurrent branch
// update operations. These do not affect correctness or
// liveness. Internally this is "*most correct* branch
// ownership" because this ownership may safely fail. This
// distinction is unimportant during configuration, so use a
// shorter name.
BranchOwnership ApproximatelyCorrectOwnership `mapstructure:"branch_ownership"`
} `mapstructure:"graveler"`
Gateways struct {
S3 struct {
Expand Down
20 changes: 20 additions & 0 deletions pkg/config/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,26 @@ func setDefaults(cfgType string) {
// 3ms of delay with ~300 requests/second per resource sounds like a reasonable tradeoff.
viper.SetDefault("graveler.max_batch_delay", 3*time.Millisecond)

viper.SetDefault("graveler.branch_ownership.enabled", false)
// ... but if branch ownership is enabled, set up some useful defaults!

// The single concurrent branch updater has these requirements from
// KV with these settings:
//
// - Cleanly acquiring ownership performs 1 read operation and 1
// write operation. Releasing ownership performs another 1 read
// operation and 1 write operation.
//
// - While ownership is held, add 2.5 read and 2.5 write operation
// per second, an additional ~7 read operations per second per
// branch operation waiting to acquire ownership, and an
// additional write operation per branch operation acquiring
// ownership.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we added some one time costs with releaseIf

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, added those!

// See additional comments on MostlyCorrectOwner for how to compute these numbers.
viper.SetDefault("graveler.branch_ownership.refresh", 400*time.Millisecond)
viper.SetDefault("graveler.branch_ownership.acquire", 150*time.Millisecond)

viper.SetDefault("ugc.prepare_interval", time.Minute)
viper.SetDefault("ugc.prepare_max_file_size", 20*1024*1024)

Expand Down
Loading
Loading