-
Notifications
You must be signed in to change notification settings - Fork 526
[Internal] Per Partition Automatic Failover: Fixes Metadata Requests Retry Policy #4205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
microsoft-github-policy-service
merged 20 commits into
master
from
users/kundadebdatta/4181_retry_metadata_requests_on_gateway_timeouts
Dec 30, 2023
Merged
Changes from all commits
Commits
Show all changes
20 commits
Select commit
Hold shift + click to select a range
eb7b0a0
Code changes to retry on next preferred region for metadata reads on …
kundadebdatta ec125de
Code changes to add retry for PK Ranges call.
kundadebdatta 483cc45
Code changes to mark endpoint unavailable for read when cosmos except…
kundadebdatta cc4657f
Code changes to fix unit tests. Added global endpoint manager in Pk R…
kundadebdatta dbee389
Code changes to fix unit tests.
kundadebdatta 505ee41
Code changes to fix build break.
kundadebdatta 77bc01d
Minor code clean-up.
kundadebdatta c26bbb9
Code changes to capture metadata location endpoint within on before s…
kundadebdatta 416cb6e
Code changes to address review comments.
kundadebdatta ba31430
Code changes to fix build failure.
kundadebdatta dab70d0
Code changes to refactor metadata timeout policy.
kundadebdatta 2e4cfc7
Code changes to add retry for request timeout. Fix emulator tests.
kundadebdatta 697f9be
Code changes to add metadata retry policy unit tests.
kundadebdatta bcb2222
Code changes to add more tests.
kundadebdatta 621bd64
Merge branch 'master' into users/kundadebdatta/4181_retry_metadata_re…
kundadebdatta 0204173
Code changes to refactor metadata retry policy logic to increment loc…
kundadebdatta 6724c77
Merge branch 'master' into users/kundadebdatta/4181_retry_metadata_re…
kundadebdatta b507ed2
Code changes to address review comments.
kundadebdatta 2f427e3
Code changes to address review comments.
kundadebdatta a20af65
Code changes to add separate condition for pk range requests.
kundadebdatta File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
190 changes: 190 additions & 0 deletions
190
Microsoft.Azure.Cosmos/src/MetadataRequestThrottleRetryPolicy.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| //------------------------------------------------------------ | ||
| // Copyright (c) Microsoft Corporation. All rights reserved. | ||
| //------------------------------------------------------------ | ||
|
|
||
| namespace Microsoft.Azure.Cosmos | ||
| { | ||
| using System; | ||
| using System.Net; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Microsoft.Azure.Cosmos.Core.Trace; | ||
| using Microsoft.Azure.Cosmos.Routing; | ||
| using Microsoft.Azure.Documents; | ||
|
|
||
| /// <summary> | ||
| /// Metadata Request Throttle Retry Policy is combination of endpoint change retry + throttling retry. | ||
| /// </summary> | ||
| internal sealed class MetadataRequestThrottleRetryPolicy : IDocumentClientRetryPolicy | ||
| { | ||
| /// <summary> | ||
| /// A constant integer defining the default maximum retry wait time in seconds. | ||
| /// </summary> | ||
| private const int DefaultMaxWaitTimeInSeconds = 60; | ||
kundadebdatta marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| /// <summary> | ||
| /// A constant integer defining the default maximum retry count on service unavailable. | ||
| /// </summary> | ||
| private const int DefaultMaxServiceUnavailableRetryCount = 1; | ||
|
|
||
| /// <summary> | ||
| /// An instance of <see cref="IGlobalEndpointManager"/>. | ||
| /// </summary> | ||
| private readonly IGlobalEndpointManager globalEndpointManager; | ||
|
|
||
| /// <summary> | ||
| /// Defines the throttling retry policy that is used as the underlying retry policy. | ||
| /// </summary> | ||
| private readonly IDocumentClientRetryPolicy throttlingRetryPolicy; | ||
|
|
||
| /// <summary> | ||
| /// An integer defining the maximum retry count on service unavailable. | ||
| /// </summary> | ||
| private readonly int maxServiceUnavailableRetryCount; | ||
|
|
||
| /// <summary> | ||
| /// An instance of <see cref="Uri"/> containing the location endpoint where the partition key | ||
| /// range http request will be sent over. | ||
| /// </summary> | ||
| private MetadataRetryContext retryContext; | ||
|
|
||
| /// <summary> | ||
| /// An integer capturing the current retry count on service unavailable. | ||
| /// </summary> | ||
| private int serviceUnavailableRetryCount; | ||
|
|
||
| /// <summary> | ||
| /// The constructor to initialize an instance of <see cref="MetadataRequestThrottleRetryPolicy"/>. | ||
| /// </summary> | ||
| /// <param name="endpointManager">An instance of <see cref="GlobalEndpointManager"/></param> | ||
| /// <param name="maxRetryAttemptsOnThrottledRequests">An integer defining the maximum number | ||
| /// of attempts to retry when requests are throttled.</param> | ||
| /// <param name="maxRetryWaitTimeInSeconds">An integer defining the maximum wait time in seconds.</param> | ||
| public MetadataRequestThrottleRetryPolicy( | ||
kundadebdatta marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| IGlobalEndpointManager endpointManager, | ||
| int maxRetryAttemptsOnThrottledRequests, | ||
| int maxRetryWaitTimeInSeconds = DefaultMaxWaitTimeInSeconds) | ||
| { | ||
| this.globalEndpointManager = endpointManager; | ||
| this.maxServiceUnavailableRetryCount = Math.Max( | ||
| MetadataRequestThrottleRetryPolicy.DefaultMaxServiceUnavailableRetryCount, | ||
| this.globalEndpointManager.PreferredLocationCount); | ||
|
|
||
| this.throttlingRetryPolicy = new ResourceThrottleRetryPolicy( | ||
| maxRetryAttemptsOnThrottledRequests, | ||
| maxRetryWaitTimeInSeconds); | ||
|
|
||
| this.retryContext = new MetadataRetryContext | ||
| { | ||
| RetryLocationIndex = 0, | ||
| RetryRequestOnPreferredLocations = true, | ||
| }; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Should the caller retry the operation. | ||
| /// </summary> | ||
| /// <param name="exception">Exception that occured when the operation was tried</param> | ||
| /// <param name="cancellationToken">An instance of <see cref="CancellationToken"/>.</param> | ||
| /// <returns>True indicates caller should retry, False otherwise</returns> | ||
| public Task<ShouldRetryResult> ShouldRetryAsync( | ||
| Exception exception, | ||
| CancellationToken cancellationToken) | ||
| { | ||
| if (exception is CosmosException cosmosException | ||
| && cosmosException.StatusCode == HttpStatusCode.ServiceUnavailable | ||
| && cosmosException.Headers.SubStatusCode == SubStatusCodes.TransportGenerated503) | ||
ealsur marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| if (this.IncrementRetryIndexOnServiceUnavailableForMetadataRead()) | ||
| { | ||
| return Task.FromResult(ShouldRetryResult.RetryAfter(TimeSpan.Zero)); | ||
| } | ||
| } | ||
|
|
||
| return this.throttlingRetryPolicy.ShouldRetryAsync(exception, cancellationToken); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Should the caller retry the operation. | ||
| /// </summary> | ||
| /// <param name="cosmosResponseMessage"><see cref="ResponseMessage"/> in return of the request</param> | ||
| /// <param name="cancellationToken">An instance of <see cref="CancellationToken"/>.</param> | ||
| /// <returns>True indicates caller should retry, False otherwise</returns> | ||
| public Task<ShouldRetryResult> ShouldRetryAsync( | ||
| ResponseMessage cosmosResponseMessage, | ||
| CancellationToken cancellationToken) | ||
| { | ||
| if (cosmosResponseMessage?.StatusCode == HttpStatusCode.ServiceUnavailable | ||
| && cosmosResponseMessage?.Headers?.SubStatusCode == SubStatusCodes.TransportGenerated503) | ||
| { | ||
| if (this.IncrementRetryIndexOnServiceUnavailableForMetadataRead()) | ||
| { | ||
| return Task.FromResult(ShouldRetryResult.RetryAfter(TimeSpan.Zero)); | ||
| } | ||
| } | ||
|
|
||
| return this.throttlingRetryPolicy.ShouldRetryAsync(cosmosResponseMessage, cancellationToken); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Method that is called before a request is sent to allow the retry policy implementation | ||
| /// to modify the state of the request. | ||
| /// </summary> | ||
| /// <param name="request">The request being sent to the service.</param> | ||
| public void OnBeforeSendRequest(DocumentServiceRequest request) | ||
| { | ||
| // Clear the previous location-based routing directive. | ||
| request.RequestContext.ClearRouteToLocation(); | ||
ealsur marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| request.RequestContext.RouteToLocation( | ||
| this.retryContext.RetryLocationIndex, | ||
| this.retryContext.RetryRequestOnPreferredLocations); | ||
|
|
||
| Uri metadataLocationEndpoint = this.globalEndpointManager.ResolveServiceEndpoint(request); | ||
|
|
||
| DefaultTrace.TraceInformation("MetadataRequestThrottleRetryPolicy: Routing the metadata request to: {0} for operation type: {1} and resource type: {2}.", metadataLocationEndpoint, request.OperationType, request.ResourceType); | ||
| request.RequestContext.RouteToLocation(metadataLocationEndpoint); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Increments the location index when a service unavailable exception ocurrs, for any future read requests. | ||
| /// </summary> | ||
| /// <returns>A boolean flag indicating if the operation was successful.</returns> | ||
| private bool IncrementRetryIndexOnServiceUnavailableForMetadataRead() | ||
| { | ||
| if (this.serviceUnavailableRetryCount++ >= this.maxServiceUnavailableRetryCount) | ||
| { | ||
| DefaultTrace.TraceWarning("MetadataRequestThrottleRetryPolicy: Retry count: {0} has exceeded the maximum permitted retry count on service unavailable: {1}.", this.serviceUnavailableRetryCount, this.maxServiceUnavailableRetryCount); | ||
| return false; | ||
| } | ||
|
|
||
| // Retrying on second PreferredLocations. | ||
| // RetryCount is used as zero-based index. | ||
| DefaultTrace.TraceWarning("MetadataRequestThrottleRetryPolicy: Incrementing the metadata retry location index to: {0}.", this.serviceUnavailableRetryCount); | ||
| this.retryContext = new MetadataRetryContext() | ||
| { | ||
| RetryLocationIndex = this.serviceUnavailableRetryCount, | ||
| RetryRequestOnPreferredLocations = true, | ||
| }; | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// A helper class containing the required attributes for | ||
| /// metadata retry context. | ||
| /// </summary> | ||
| internal sealed class MetadataRetryContext | ||
| { | ||
| /// <summary> | ||
| /// An integer defining the current retry location index. | ||
| /// </summary> | ||
| public int RetryLocationIndex { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// A boolean flag indicating if the request should retry on | ||
| /// preferred locations. | ||
| /// </summary> | ||
| public bool RetryRequestOnPreferredLocations { get; set; } | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.