-
Notifications
You must be signed in to change notification settings - Fork 406
Experiment: Improve concurrent merge performance by weakly owning branch updates #8268
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
18 commits
Select commit
Hold shift + click to select a range
4c2766d
Add "WeakOwner", a KV-based weak ownership mechanism
arielshaqed 1ef841a
Obtain weak ownership of branch on all `BranchUpdate` operations
arielshaqed c17c472
Better default parameters for branch ownership
arielshaqed 1942157
Add lakefs abuse merge command
arielshaqed 1b7b740
Make branch weak ownership configurable
arielshaqed bd63ee4
Straighten out interval handling and fix checks-validator
arielshaqed c6b412c
[CR] Bug: Ensure single owner succeeds the first time a key is owned
arielshaqed 7624545
Remove log print from test - confusing in a codebase
arielshaqed 5566132
[CR] Fix comments, error phrasing, and command descriptions
arielshaqed c94c7a7
[CR] Clarify request ID handling when missing, rename own -> release
arielshaqed eb84829
[CR] Remove finished sentinel and break ownership update loop on error
arielshaqed 305c433
[CR] Run Esti test with branch ownership
arielshaqed c7ecd5f
Add log line to indicate ref manager started with weak ownership
arielshaqed 0ec602d
[CR] Only reset if owned when cancelling weak ownership
arielshaqed 007e04a
Add totals line to lakectl abuse statistics
arielshaqed 3cc70c9
lakectl abuse merge: clean up branches before exiting
arielshaqed b198397
[CR] Correctly count KV ops in comments, and some minor cleanups
arielshaqed d352071
Rename basic "ownership" class and move it to pkg/distributed/
arielshaqed File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,4 +5,3 @@ plugins: | |
| out: pkg | ||
| opt: | ||
| - paths=source_relative | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,197 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "net/http" | ||
| "os" | ||
| "sync" | ||
| "syscall" | ||
| "time" | ||
|
|
||
| nanoid "github.com/matoous/go-nanoid/v2" | ||
| "github.com/spf13/cobra" | ||
| "github.com/treeverse/lakefs/pkg/api/apigen" | ||
| "github.com/treeverse/lakefs/pkg/api/apiutil" | ||
| "github.com/treeverse/lakefs/pkg/api/helpers" | ||
| "github.com/treeverse/lakefs/pkg/testutil/stress" | ||
| "github.com/treeverse/lakefs/pkg/uri" | ||
| ) | ||
|
|
||
| // removeBranches removes all branches whose names start with prefix, in | ||
| // parallel. It reports (all) failures but does not fail. | ||
| func removeBranches(ctx context.Context, client *apigen.ClientWithResponses, parallelism int, repo, prefix string) { | ||
| toDelete := make(chan string) | ||
| pfx := apigen.PaginationPrefix(prefix) | ||
| after := apigen.PaginationAfter("") | ||
| go func() { | ||
| defer close(toDelete) | ||
| for { | ||
| resp, err := client.ListBranchesWithResponse(ctx, repo, &apigen.ListBranchesParams{ | ||
| Prefix: &pfx, | ||
| After: &after, | ||
| }) | ||
| if err != nil { | ||
| fmt.Printf("Failed to request to list branches %s/%s after %s: %s\n", repo, pfx, after, err) | ||
| } | ||
| if resp.JSON200 == nil { | ||
| fmt.Printf("Failed to list branches %s/%s after %s: %s\n", repo, pfx, after, resp.Status()) | ||
| break | ||
| } | ||
| for _, result := range resp.JSON200.Results { | ||
| toDelete <- result.Id | ||
| } | ||
| if !resp.JSON200.Pagination.HasMore { | ||
| break | ||
| } | ||
| after = apigen.PaginationAfter(resp.JSON200.Pagination.NextOffset) | ||
| } | ||
| }() | ||
|
|
||
| wg := &sync.WaitGroup{} | ||
| wg.Add(parallelism) | ||
| for i := 0; i < parallelism; i++ { | ||
| go func() { | ||
| for branch := range toDelete { | ||
| resp, err := client.DeleteBranchWithResponse(ctx, repo, branch, &apigen.DeleteBranchParams{}) | ||
| if err != nil { | ||
| fmt.Printf("Failed to request %s deletion: %s\n", branch, err) | ||
| continue | ||
| } | ||
| if resp.StatusCode() != http.StatusNoContent { | ||
| fmt.Printf("Failed to delete %s: %s\n", branch, resp.Status()) | ||
| continue | ||
| } | ||
| } | ||
| wg.Done() | ||
| }() | ||
| } | ||
| wg.Wait() | ||
| } | ||
|
|
||
| var abuseMergeCmd = &cobra.Command{ | ||
| Use: "merge <branch URI>", | ||
| Short: "Merge non-conflicting objects to the source branch in parallel", | ||
| Hidden: false, | ||
| Args: cobra.ExactArgs(1), | ||
| ValidArgsFunction: ValidArgsRepository, | ||
| Run: func(cmd *cobra.Command, args []string) { | ||
| u := MustParseBranchURI("branch URI", args[0]) | ||
| amount := Must(cmd.Flags().GetInt("amount")) | ||
| parallelism := Must(cmd.Flags().GetInt("parallelism")) | ||
|
|
||
| fmt.Println("Source branch: ", u) | ||
|
|
||
| branchPrefix := "merge-" + nanoid.Must() | ||
| fmt.Println("Branch prefix: ", branchPrefix) | ||
|
|
||
| generator := stress.NewGenerator("merge", parallelism, stress.WithSignalHandlersFor(os.Interrupt, syscall.SIGTERM)) | ||
|
|
||
| client := getClient() | ||
|
|
||
| // generate branch names as input | ||
| generator.Setup(func(add stress.GeneratorAddFn) { | ||
| for i := 0; i < amount; i++ { | ||
| add(fmt.Sprintf("%s-%04d", branchPrefix, i+1)) | ||
| } | ||
| }) | ||
|
|
||
| defer removeBranches(cmd.Context(), client, parallelism, u.Repository, branchPrefix) | ||
|
|
||
| resp, err := client.GetRepositoryWithResponse(cmd.Context(), u.Repository) | ||
| DieOnErrorOrUnexpectedStatusCode(resp, err, http.StatusOK) | ||
| if resp.JSON200 == nil { | ||
| DieFmt("Bad response from server: %+v", resp) | ||
| } | ||
|
|
||
| ctx := cmd.Context() | ||
|
|
||
| // execute ALL the things! | ||
| generator.Run(func(input chan string, output chan stress.Result) { | ||
| client := getClient() | ||
| for work := range input { | ||
| start := time.Now() | ||
| err := mergeSomething(ctx, client, u, work) | ||
| output <- stress.Result{ | ||
| Error: err, | ||
| Took: time.Since(start), | ||
| } | ||
| // Don't block or sleep to maximise parallel load. | ||
| } | ||
| }) | ||
| }, | ||
| } | ||
|
|
||
| func mergeSomething(ctx context.Context, client *apigen.ClientWithResponses, base *uri.URI, name string) error { | ||
| createBranchResponse, err := client.CreateBranchWithResponse(ctx, base.Repository, | ||
| apigen.CreateBranchJSONRequestBody{ | ||
| Name: name, | ||
| Source: base.Ref, | ||
| }, | ||
| ) | ||
| if err != nil || !apiutil.IsStatusCodeOK(createBranchResponse.StatusCode()) { | ||
| if err == nil { | ||
| err = helpers.ResponseAsError(createBranchResponse) | ||
| } | ||
| return fmt.Errorf("create branch %s: %w", name, err) | ||
| } | ||
|
|
||
| u := base.WithRef(name) | ||
| // Use a different name on each branch, to avoid conflicts. | ||
| path := fmt.Sprintf("object-%s", name) | ||
| u.Path = &path | ||
|
|
||
| getResponse, err := client.GetPhysicalAddressWithResponse(ctx, u.Repository, u.Ref, &apigen.GetPhysicalAddressParams{Path: *u.Path}) | ||
| if err != nil || getResponse.JSON200 == nil { | ||
| if err == nil { | ||
| err = helpers.ResponseAsError(getResponse) | ||
| } | ||
| return fmt.Errorf("get physical address for %s: %w", name, err) | ||
| } | ||
| // Link the object but do not actually upload anything - it is not | ||
| // important for merging, and would only reduce load. | ||
| stagingLocation := getResponse.JSON200 | ||
| linkResponse, err := client.LinkPhysicalAddressWithResponse(ctx, u.Repository, u.Ref, | ||
| &apigen.LinkPhysicalAddressParams{ | ||
| Path: *u.Path, | ||
| }, | ||
| apigen.LinkPhysicalAddressJSONRequestBody{ | ||
| Checksum: "deadbeef0000cafe", | ||
| Staging: apigen.StagingLocation{ | ||
| PhysicalAddress: stagingLocation.PhysicalAddress, | ||
| }, | ||
| UserMetadata: nil, | ||
| }) | ||
| if err != nil || linkResponse.JSON200 == nil { | ||
| if err == nil { | ||
| err = helpers.ResponseAsError(linkResponse) | ||
| } | ||
| return fmt.Errorf("link physical address for %s: %w", name, err) | ||
| } | ||
|
|
||
| commitResponse, err := client.CommitWithResponse(ctx, u.Repository, u.Ref, &apigen.CommitParams{}, apigen.CommitJSONRequestBody{Message: fmt.Sprintf("commit %s", name)}) | ||
| if err != nil || commitResponse.JSON201 == nil { | ||
| if err == nil { | ||
| err = helpers.ResponseAsError(commitResponse) | ||
| } | ||
| return fmt.Errorf("commit for %s: %w", name, err) | ||
| } | ||
|
|
||
| mergeResponse, err := client.MergeIntoBranchWithResponse(ctx, u.Repository, u.Ref, base.Ref, apigen.MergeIntoBranchJSONRequestBody{}) | ||
| if err != nil || mergeResponse.JSON200 == nil { | ||
| if err == nil { | ||
| err = helpers.ResponseAsError(mergeResponse) | ||
| } | ||
| return fmt.Errorf("merge from %s: %w", name, err) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| //nolint:gochecknoinits,mnd | ||
| func init() { | ||
| abuseMergeCmd.Flags().Int("amount", 1000, "amount of merges to perform") | ||
| abuseMergeCmd.Flags().Int("parallelism", abuseDefaultParallelism, "number of merges to perform in parallel") | ||
|
|
||
| abuseCmd.AddCommand(abuseMergeCmd) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -147,6 +147,26 @@ func setDefaults(cfgType string) { | |
| // 3ms of delay with ~300 requests/second per resource sounds like a reasonable tradeoff. | ||
| viper.SetDefault("graveler.max_batch_delay", 3*time.Millisecond) | ||
|
|
||
| viper.SetDefault("graveler.branch_ownership.enabled", false) | ||
| // ... but if branch ownership is enabled, set up some useful defaults! | ||
|
|
||
| // The single concurrent branch updater has these requirements from | ||
| // KV with these settings: | ||
| // | ||
| // - Cleanly acquiring ownership performs 1 read operation and 1 | ||
| // write operation. Releasing ownership performs another 1 read | ||
| // operation and 1 write operation. | ||
| // | ||
| // - While ownership is held, add 2.5 read and 2.5 write operation | ||
| // per second, an additional ~7 read operations per second per | ||
| // branch operation waiting to acquire ownership, and an | ||
| // additional write operation per branch operation acquiring | ||
| // ownership. | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we added some one time costs with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, added those! |
||
| // See additional comments on MostlyCorrectOwner for how to compute these numbers. | ||
| viper.SetDefault("graveler.branch_ownership.refresh", 400*time.Millisecond) | ||
| viper.SetDefault("graveler.branch_ownership.acquire", 150*time.Millisecond) | ||
|
|
||
| viper.SetDefault("ugc.prepare_interval", time.Minute) | ||
| viper.SetDefault("ugc.prepare_max_file_size", 20*1024*1024) | ||
|
|
||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.