From 7e9b945712f158e6dd4ddeeff273372be8cad15a Mon Sep 17 00:00:00 2001 From: "Arooshi Avasthy (from Dev Box)" Date: Tue, 14 Oct 2025 10:08:56 -0700 Subject: [PATCH 01/10] Add header for 404/1002 retry requests. --- .../src/ClientRetryPolicy.cs | 17 ++- .../CosmosItemTests.cs | 114 +++++++++++++++++- .../ClientRetryPolicyTests.cs | 48 +++++++- 3 files changed, 172 insertions(+), 7 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index c11c6abd7f..9efb2a07e4 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -22,7 +22,8 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy { private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; + private const int MaxServiceUnavailableRetryCount = 1; + private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private readonly IDocumentClientRetryPolicy throttlingRetry; private readonly GlobalEndpointManager globalEndpointManager; @@ -38,7 +39,8 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private bool isMultiMasterWriteRequest; private Uri locationEndpoint; private RetryContext retryContext; - private DocumentServiceRequest documentServiceRequest; + private DocumentServiceRequest documentServiceRequest; + private bool addHubRegionProcessingOnlyHeader; public ClientRetryPolicy( GlobalEndpointManager globalEndpointManager, @@ -222,6 +224,12 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) // set location-based routing directive based on request retry context request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); } + } + // If previous attempt failed with 404/1002, add the hub-region-processing-only header + if (this.addHubRegionProcessingOnlyHeader) + { + request.Headers[HubRegionHeader] = bool.TrueString; + this.addHubRegionProcessingOnlyHeader = false; // reset after applying } // Resolve the endpoint for the request and pin the resolution to the resolved endpoint @@ -322,7 +330,8 @@ private async Task ShouldRetryInternalAsync( if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { + { + this.addHubRegionProcessingOnlyHeader = true; return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); } @@ -338,7 +347,7 @@ private async Task ShouldRetryInternalAsync( || (statusCode == HttpStatusCode.Gone && subStatusCode == SubStatusCodes.LeaseNotFound)) { return this.ShouldRetryOnUnavailableEndpointStatusCodes(); - } + } return null; } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs index 0cc2eb1eef..c0917bf8d3 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs @@ -20,6 +20,7 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests using System.Threading.Tasks; using Microsoft.Azure.Cosmos; using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Handlers; using Microsoft.Azure.Cosmos.Json; using Microsoft.Azure.Cosmos.Query.Core.ExecutionContext; using Microsoft.Azure.Cosmos.Query.Core.QueryClient; @@ -39,7 +40,8 @@ public class CosmosItemTests : BaseCosmosClientHelper { private Container Container = null; private ContainerProperties containerSettings = null; - + + private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private static readonly string nonPartitionItemId = "fixed-Container-Item"; private static readonly string undefinedPartitionItemId = "undefined-partition-Item"; @@ -4315,7 +4317,115 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe JsonConvert.DefaultSettings = () => default; } - } + } + + [TestMethod] + [Owner("aavasthy")] + [Description("Forces a single 404/1002 from the gateway and verifies ClientRetryPolicy adds x-ms-cosmos-hub-region-processing-only on the retry request.")] + public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002() + { + bool headerObservedOnRetry = false; + int requestCount = 0; + bool shouldReturn404 = true; + + // Created HTTP handler to intercept requests + HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper + { + RequestCallBack = (request, cancellationToken) => + { + // Track all document read requests + if (request.Method == HttpMethod.Get && + request.RequestUri != null && + request.RequestUri.AbsolutePath.Contains("/docs/")) + { + requestCount++; + + // Check for hub header on retry (2nd+ request) + if (requestCount > 1 && + request.Headers.TryGetValues(HubRegionHeader, out IEnumerable values) && + values.Any(v => v.Equals(bool.TrueString, StringComparison.OrdinalIgnoreCase))) + { + headerObservedOnRetry = true; + } + } + + return Task.FromResult(null); + }, + + ResponseIntercepter = (response, request) => + { + if (shouldReturn404 && + request.Method == HttpMethod.Get && + request.RequestUri != null && + request.RequestUri.AbsolutePath.Contains("/docs/")) + { + shouldReturn404 = false; // Only return 404 once + + var errorResponse = new + { + code = "NotFound", + message = "Message: {\"Errors\":[\"Resource Not Found. Learn more: https://aka.ms/cosmosdb-tsg-not-found\"]}\r\nActivityId: " + Guid.NewGuid() + ", Request URI: " + request.RequestUri, + additionalErrorInfo = "" + }; + + HttpResponseMessage notFoundResponse = new HttpResponseMessage(HttpStatusCode.NotFound) + { + Content = new StringContent( + JsonConvert.SerializeObject(errorResponse), + Encoding.UTF8, + "application/json" + ) + }; + + // Add the substatus header for ReadSessionNotAvailable + notFoundResponse.Headers.Add("x-ms-substatus", "1002"); + notFoundResponse.Headers.Add("x-ms-activity-id", Guid.NewGuid().ToString()); + notFoundResponse.Headers.Add("x-ms-request-charge", "1.0"); + + return Task.FromResult(notFoundResponse); + } + + return Task.FromResult(response); + } + }; + + CosmosClientOptions clientOptions = new CosmosClientOptions + { + ConnectionMode = ConnectionMode.Gateway, + ConsistencyLevel = Cosmos.ConsistencyLevel.Session, + HttpClientFactory = () => new HttpClient(httpHandler), + MaxRetryAttemptsOnRateLimitedRequests = 9, + MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.FromSeconds(30) + }; + + using CosmosClient customClient = TestCommon.CreateCosmosClient(clientOptions); + + Container customContainer = customClient.GetContainer(this.database.Id, this.Container.Id); + + // Create a test item first + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + try + { + // This should trigger 404/1002 on first attempt, then retry with hub header + ItemResponse response = await customContainer.ReadItemAsync( + testItem.id, + new Cosmos.PartitionKey(testItem.pk)); + + Assert.IsNotNull(response); + Assert.IsNotNull(response.Resource); + } + catch (CosmosException) + { + // It's possible the retry also fails, but should still have seen the retry attempt + } + + // Verifying retry happened + Assert.IsTrue(requestCount >= 2, $"Expected at least 2 requests (original + retry), but got {requestCount}"); + Assert.IsTrue(headerObservedOnRetry, $"Expected retry request to include '{HubRegionHeader}: true'"); + } + private async Task AutoGenerateIdPatternTest(Cosmos.PartitionKey pk, T itemWithoutId) { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 26ad1e3b88..26eeadbcb4 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -27,7 +27,8 @@ public sealed class ClientRetryPolicyTests { private static Uri Location1Endpoint = new Uri("https://location1.documents.azure.com"); private static Uri Location2Endpoint = new Uri("https://location2.documents.azure.com"); - + + private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private ReadOnlyCollection preferredLocations; private AccountProperties databaseAccount; private GlobalPartitionEndpointManager partitionKeyRangeLocationCache; @@ -400,6 +401,51 @@ public async Task ClientRetryPolicy_NoRetry_MultiMaster_Write_NoPreferredLocatio { await this.ValidateConnectTimeoutTriggersClientRetryPolicyAsync(isReadRequest: false, useMultipleWriteLocations: true, usesPreferredLocations: false, true); } + + [TestMethod] + public async Task ClientRetryPolicy_AddsHubRegionProcessingOnlyHeader_On404_1002() + { + // Arrange + const bool enableEndpointDiscovery = true; + + using GlobalEndpointManager endpointManager = this.Initialize( + useMultipleWriteLocations: true, + enableEndpointDiscovery: enableEndpointDiscovery, + isPreferredLocationsListEmpty: false); + + ClientRetryPolicy retryPolicy = new ClientRetryPolicy( + endpointManager, + this.partitionKeyRangeLocationCache, + new RetryOptions(), + enableEndpointDiscovery, + isThinClientEnabled: false); + + DocumentServiceRequest request1 = this.CreateRequest(isReadRequest: true, isMasterResourceType: false); + + Assert.IsNull(request1.Headers.GetValues(HubRegionHeader), "Header should not exist before any retry."); + + DocumentClientException simulatedException = new DocumentClientException( + message: "Simulated 404/1002 ReadSessionNotAvailable", + innerException: null, + statusCode: HttpStatusCode.NotFound, + substatusCode: SubStatusCodes.ReadSessionNotAvailable, + requestUri: request1.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + // Act: policy detects error and sets flag + ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(simulatedException, CancellationToken.None); + + retryPolicy.OnBeforeSendRequest(request1); + string[] headerValues = request1.Headers.GetValues(HubRegionHeader); + Assert.IsNotNull(headerValues, "Expected header to be added after 404/1002 retry signal."); + Assert.AreEqual(1, headerValues.Length, "Header should have exactly one value."); + Assert.AreEqual(bool.TrueString, headerValues[0], "Header value should be 'True'."); + + // Header not applied to a new request + DocumentServiceRequest request2 = this.CreateRequest(isReadRequest: true, isMasterResourceType: false); + retryPolicy.OnBeforeSendRequest(request2); + Assert.IsNull(request2.Headers.GetValues(HubRegionHeader), "Header should not be set on a new request after flag is reset."); + } private async Task ValidateConnectTimeoutTriggersClientRetryPolicyAsync( bool isReadRequest, From e523341a8f4e4a8d3056058f1f3a75ab1a29b4e8 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Tue, 4 Nov 2025 08:12:55 -0800 Subject: [PATCH 02/10] Update direct package and retry header code --- Directory.Build.props | 2 +- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 28e0a26601..308a83da17 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -3,7 +3,7 @@ 3.54.1 3.55.0 preview.1 - 3.41.0 + 3.41.2 1.0.0 beta.0 2.0.5 diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 9efb2a07e4..75cbbecf71 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -228,8 +228,8 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) // If previous attempt failed with 404/1002, add the hub-region-processing-only header if (this.addHubRegionProcessingOnlyHeader) { - request.Headers[HubRegionHeader] = bool.TrueString; - this.addHubRegionProcessingOnlyHeader = false; // reset after applying + request.Headers.Add(HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion, bool.TrueString); + this.addHubRegionProcessingOnlyHeader = false; } // Resolve the endpoint for the request and pin the resolution to the resolved endpoint From 35e0eebc29ba8d846b345ce2cc4d26d932a95ecf Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Tue, 16 Dec 2025 09:41:37 -0800 Subject: [PATCH 03/10] Code clean up --- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 75cbbecf71..b5a096b1da 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -23,7 +23,6 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. private const int MaxRetryCount = 120; private const int MaxServiceUnavailableRetryCount = 1; - private const string HubRegionHeader = "x-ms-cosmos-hub-region-processing-only"; private readonly IDocumentClientRetryPolicy throttlingRetry; private readonly GlobalEndpointManager globalEndpointManager; From f622ebdbe8cb3b7d71a33e54ace0d8d517ecf829 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Thu, 18 Dec 2025 14:55:13 -0800 Subject: [PATCH 04/10] Add not to be used internally check for hubregion header. --- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index b5a096b1da..2bb1734d7f 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -224,13 +224,14 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); } } +#if !INTERNAL // If previous attempt failed with 404/1002, add the hub-region-processing-only header if (this.addHubRegionProcessingOnlyHeader) { request.Headers.Add(HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion, bool.TrueString); this.addHubRegionProcessingOnlyHeader = false; } - +#endif // Resolve the endpoint for the request and pin the resolution to the resolved endpoint // This enables marking the endpoint unavailability on endpoint failover/unreachability this.locationEndpoint = this.isThinClientEnabled From 961288fd3648aeb16b1777c6c97d2c8ce1de5fc1 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Fri, 19 Dec 2025 10:05:42 -0800 Subject: [PATCH 05/10] Add not to be used internally check for hubregion header. --- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2bb1734d7f..d6cb867f19 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -39,7 +39,9 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private Uri locationEndpoint; private RetryContext retryContext; private DocumentServiceRequest documentServiceRequest; +#if !INTERNAL private bool addHubRegionProcessingOnlyHeader; +#endif public ClientRetryPolicy( GlobalEndpointManager globalEndpointManager, @@ -331,7 +333,9 @@ private async Task ShouldRetryInternalAsync( if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) { - this.addHubRegionProcessingOnlyHeader = true; +#if !INTERNAL + this.addHubRegionProcessingOnlyHeader = true; +#endif return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); } From 692484cb296c8390d9ccfc3e12b6f3285ad66bb0 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Mon, 5 Jan 2026 19:10:27 -0800 Subject: [PATCH 06/10] Made property volatile --- Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index d6cb867f19..2973e7e7a7 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -40,7 +40,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy private RetryContext retryContext; private DocumentServiceRequest documentServiceRequest; #if !INTERNAL - private bool addHubRegionProcessingOnlyHeader; + private volatile bool addHubRegionProcessingOnlyHeader; #endif public ClientRetryPolicy( From 66d481bc20f04033639f20dc869640dda894e712 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Fri, 9 Jan 2026 10:37:48 -0800 Subject: [PATCH 07/10] Update retry logic and add header for all subsequent request. --- .../src/ClientRetryPolicy.cs | 69 +++++++++---------- .../ClientRetryPolicyTests.cs | 51 +++++++++----- 2 files changed, 68 insertions(+), 52 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 2973e7e7a7..c42e8c0b08 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -203,45 +203,44 @@ public async Task ShouldRetryAsync( /// to modify the state of the request. /// /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - this.isMultiMasterWriteRequest = !this.isReadRequest - && (this.globalEndpointManager?.CanSupportMultipleWriteLocations(request.ResourceType, request.OperationType) ?? false); - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + this.isMultiMasterWriteRequest = !this.isReadRequest + && (this.globalEndpointManager?.CanSupportMultipleWriteLocations(request.ResourceType, request.OperationType) ?? false); + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } } -#if !INTERNAL - // If previous attempt failed with 404/1002, add the hub-region-processing-only header +#if !INTERNAL + // If previous attempt failed with 404/1002, add the hub-region-processing-only header to all subsequent retry attempts if (this.addHubRegionProcessingOnlyHeader) { - request.Headers.Add(HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion, bool.TrueString); - this.addHubRegionProcessingOnlyHeader = false; - } + request.Headers[HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion] = bool.TrueString; + } #endif - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.isThinClientEnabled - && GatewayStoreModel.IsOperationSupportedByThinClient(request) - ? this.globalEndpointManager.ResolveThinClientEndpoint(request) - : this.globalEndpointManager.ResolveServiceEndpoint(request); - - request.RequestContext.RouteToLocation(this.locationEndpoint); + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.isThinClientEnabled + && GatewayStoreModel.IsOperationSupportedByThinClient(request) + ? this.globalEndpointManager.ResolveThinClientEndpoint(request) + : this.globalEndpointManager.ResolveServiceEndpoint(request); + + request.RequestContext.RouteToLocation(this.locationEndpoint); } private async Task ShouldRetryInternalAsync( diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 26eeadbcb4..927220f3c8 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -403,7 +403,9 @@ public async Task ClientRetryPolicy_NoRetry_MultiMaster_Write_NoPreferredLocatio } [TestMethod] - public async Task ClientRetryPolicy_AddsHubRegionProcessingOnlyHeader_On404_1002() + [DataRow(true, DisplayName = "Read request - Hub region header persists across retries after 404/1002")] + [DataRow(false, DisplayName = "Write request - Hub region header persists across retries after 404/1002")] + public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_AndPersistsAcrossRetries(bool isReadRequest) { // Arrange const bool enableEndpointDiscovery = true; @@ -420,31 +422,46 @@ public async Task ClientRetryPolicy_AddsHubRegionProcessingOnlyHeader_On404_1002 enableEndpointDiscovery, isThinClientEnabled: false); - DocumentServiceRequest request1 = this.CreateRequest(isReadRequest: true, isMasterResourceType: false); + DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false); - Assert.IsNull(request1.Headers.GetValues(HubRegionHeader), "Header should not exist before any retry."); + // First attempt - header should not exist + retryPolicy.OnBeforeSendRequest(request); + Assert.IsNull(request.Headers.GetValues(HubRegionHeader), "Header should not exist on initial request before any 404/1002 error."); - DocumentClientException simulatedException = new DocumentClientException( + // Simulate 404/1002 error + DocumentClientException sessionNotAvailableException = new DocumentClientException( message: "Simulated 404/1002 ReadSessionNotAvailable", innerException: null, statusCode: HttpStatusCode.NotFound, substatusCode: SubStatusCodes.ReadSessionNotAvailable, - requestUri: request1.RequestContext.LocationEndpointToRoute, + requestUri: request.RequestContext.LocationEndpointToRoute, responseHeaders: new DictionaryNameValueCollection()); - // Act: policy detects error and sets flag - ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(simulatedException, CancellationToken.None); - - retryPolicy.OnBeforeSendRequest(request1); - string[] headerValues = request1.Headers.GetValues(HubRegionHeader); - Assert.IsNotNull(headerValues, "Expected header to be added after 404/1002 retry signal."); - Assert.AreEqual(1, headerValues.Length, "Header should have exactly one value."); - Assert.AreEqual(bool.TrueString, headerValues[0], "Header value should be 'True'."); + ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException, CancellationToken.None); + Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry on 404/1002."); - // Header not applied to a new request - DocumentServiceRequest request2 = this.CreateRequest(isReadRequest: true, isMasterResourceType: false); - retryPolicy.OnBeforeSendRequest(request2); - Assert.IsNull(request2.Headers.GetValues(HubRegionHeader), "Header should not be set on a new request after flag is reset."); + // Verify header is added and persists across multiple retry attempts + for (int retryAttempt = 1; retryAttempt <= 3; retryAttempt++) + { + retryPolicy.OnBeforeSendRequest(request); + string[] headerValues = request.Headers.GetValues(HubRegionHeader); + Assert.IsNotNull(headerValues, $"Header should be present on retry attempt {retryAttempt}."); + Assert.AreEqual(1, headerValues.Length, $"Header should have exactly one value on retry attempt {retryAttempt}."); + Assert.AreEqual(bool.TrueString, headerValues[0], $"Header value should be 'True' on retry attempt {retryAttempt}."); + + if (retryAttempt < 3) + { + DocumentClientException serviceUnavailableException = new DocumentClientException( + message: "Simulated 503 ServiceUnavailable", + innerException: null, + statusCode: HttpStatusCode.ServiceUnavailable, + substatusCode: SubStatusCodes.Unknown, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); + } + } } private async Task ValidateConnectTimeoutTriggersClientRetryPolicyAsync( From 7936adaf8107007f5aed2794709568425671ac18 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Fri, 9 Jan 2026 10:58:51 -0800 Subject: [PATCH 08/10] Correct formatting --- .../src/ClientRetryPolicy.cs | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index c42e8c0b08..0fc27cf8f2 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -22,7 +22,7 @@ internal sealed class ClientRetryPolicy : IDocumentClientRetryPolicy { private const int RetryIntervalInMS = 1000; // Once we detect failover wait for 1 second before retrying request. private const int MaxRetryCount = 120; - private const int MaxServiceUnavailableRetryCount = 1; + private const int MaxServiceUnavailableRetryCount = 1; private readonly IDocumentClientRetryPolicy throttlingRetry; private readonly GlobalEndpointManager globalEndpointManager; @@ -203,28 +203,28 @@ public async Task ShouldRetryAsync( /// to modify the state of the request. /// /// The request being sent to the service. - public void OnBeforeSendRequest(DocumentServiceRequest request) - { - this.isReadRequest = request.IsReadOnlyRequest; - this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); - this.documentServiceRequest = request; - this.isMultiMasterWriteRequest = !this.isReadRequest - && (this.globalEndpointManager?.CanSupportMultipleWriteLocations(request.ResourceType, request.OperationType) ?? false); - - // clear previous location-based routing directive - request.RequestContext.ClearRouteToLocation(); - - if (this.retryContext != null) - { - if (this.retryContext.RouteToHub) - { - request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); - } - else - { - // set location-based routing directive based on request retry context - request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); - } + public void OnBeforeSendRequest(DocumentServiceRequest request) + { + this.isReadRequest = request.IsReadOnlyRequest; + this.canUseMultipleWriteLocations = this.globalEndpointManager.CanUseMultipleWriteLocations(request); + this.documentServiceRequest = request; + this.isMultiMasterWriteRequest = !this.isReadRequest + && (this.globalEndpointManager?.CanSupportMultipleWriteLocations(request.ResourceType, request.OperationType) ?? false); + + // clear previous location-based routing directive + request.RequestContext.ClearRouteToLocation(); + + if (this.retryContext != null) + { + if (this.retryContext.RouteToHub) + { + request.RequestContext.RouteToLocation(this.globalEndpointManager.GetHubUri()); + } + else + { + // set location-based routing directive based on request retry context + request.RequestContext.RouteToLocation(this.retryContext.RetryLocationIndex, this.retryContext.RetryRequestOnPreferredLocations); + } } #if !INTERNAL // If previous attempt failed with 404/1002, add the hub-region-processing-only header to all subsequent retry attempts @@ -233,14 +233,14 @@ public void OnBeforeSendRequest(DocumentServiceRequest request) request.Headers[HttpConstants.HttpHeaders.ShouldProcessOnlyInHubRegion] = bool.TrueString; } #endif - // Resolve the endpoint for the request and pin the resolution to the resolved endpoint - // This enables marking the endpoint unavailability on endpoint failover/unreachability - this.locationEndpoint = this.isThinClientEnabled - && GatewayStoreModel.IsOperationSupportedByThinClient(request) - ? this.globalEndpointManager.ResolveThinClientEndpoint(request) - : this.globalEndpointManager.ResolveServiceEndpoint(request); - - request.RequestContext.RouteToLocation(this.locationEndpoint); + // Resolve the endpoint for the request and pin the resolution to the resolved endpoint + // This enables marking the endpoint unavailability on endpoint failover/unreachability + this.locationEndpoint = this.isThinClientEnabled + && GatewayStoreModel.IsOperationSupportedByThinClient(request) + ? this.globalEndpointManager.ResolveThinClientEndpoint(request) + : this.globalEndpointManager.ResolveServiceEndpoint(request); + + request.RequestContext.RouteToLocation(this.locationEndpoint); } private async Task ShouldRetryInternalAsync( @@ -334,7 +334,7 @@ private async Task ShouldRetryInternalAsync( { #if !INTERNAL this.addHubRegionProcessingOnlyHeader = true; -#endif +#endif return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); } @@ -350,7 +350,7 @@ private async Task ShouldRetryInternalAsync( || (statusCode == HttpStatusCode.Gone && subStatusCode == SubStatusCodes.LeaseNotFound)) { return this.ShouldRetryOnUnavailableEndpointStatusCodes(); - } + } return null; } From 6bffa82cf3bcc40896cba4ee06aeb5c60e635afd Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Wed, 21 Jan 2026 21:04:25 -0800 Subject: [PATCH 09/10] Update the check to work only for single master. --- .../src/ClientRetryPolicy.cs | 8 +- .../ClientRetryPolicyTests.cs | 138 ++++++++++-------- 2 files changed, 84 insertions(+), 62 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 0fc27cf8f2..1b480d4798 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -331,9 +331,13 @@ private async Task ShouldRetryInternalAsync( if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) - { + { #if !INTERNAL - this.addHubRegionProcessingOnlyHeader = true; + // Only set the hub region processing header for single master accounts + if (!this.canUseMultipleWriteLocations) + { + this.addHubRegionProcessingOnlyHeader = true; + } #endif return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index 927220f3c8..eef1f84672 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -402,66 +402,84 @@ public async Task ClientRetryPolicy_NoRetry_MultiMaster_Write_NoPreferredLocatio await this.ValidateConnectTimeoutTriggersClientRetryPolicyAsync(isReadRequest: false, useMultipleWriteLocations: true, usesPreferredLocations: false, true); } - [TestMethod] - [DataRow(true, DisplayName = "Read request - Hub region header persists across retries after 404/1002")] - [DataRow(false, DisplayName = "Write request - Hub region header persists across retries after 404/1002")] - public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_AndPersistsAcrossRetries(bool isReadRequest) - { - // Arrange - const bool enableEndpointDiscovery = true; - - using GlobalEndpointManager endpointManager = this.Initialize( - useMultipleWriteLocations: true, - enableEndpointDiscovery: enableEndpointDiscovery, - isPreferredLocationsListEmpty: false); - - ClientRetryPolicy retryPolicy = new ClientRetryPolicy( - endpointManager, - this.partitionKeyRangeLocationCache, - new RetryOptions(), - enableEndpointDiscovery, - isThinClientEnabled: false); - - DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false); - - // First attempt - header should not exist - retryPolicy.OnBeforeSendRequest(request); - Assert.IsNull(request.Headers.GetValues(HubRegionHeader), "Header should not exist on initial request before any 404/1002 error."); - - // Simulate 404/1002 error - DocumentClientException sessionNotAvailableException = new DocumentClientException( - message: "Simulated 404/1002 ReadSessionNotAvailable", - innerException: null, - statusCode: HttpStatusCode.NotFound, - substatusCode: SubStatusCodes.ReadSessionNotAvailable, - requestUri: request.RequestContext.LocationEndpointToRoute, - responseHeaders: new DictionaryNameValueCollection()); - - ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException, CancellationToken.None); - Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry on 404/1002."); - - // Verify header is added and persists across multiple retry attempts - for (int retryAttempt = 1; retryAttempt <= 3; retryAttempt++) - { - retryPolicy.OnBeforeSendRequest(request); - string[] headerValues = request.Headers.GetValues(HubRegionHeader); - Assert.IsNotNull(headerValues, $"Header should be present on retry attempt {retryAttempt}."); - Assert.AreEqual(1, headerValues.Length, $"Header should have exactly one value on retry attempt {retryAttempt}."); - Assert.AreEqual(bool.TrueString, headerValues[0], $"Header value should be 'True' on retry attempt {retryAttempt}."); - - if (retryAttempt < 3) - { - DocumentClientException serviceUnavailableException = new DocumentClientException( - message: "Simulated 503 ServiceUnavailable", - innerException: null, - statusCode: HttpStatusCode.ServiceUnavailable, - substatusCode: SubStatusCodes.Unknown, - requestUri: request.RequestContext.LocationEndpointToRoute, - responseHeaders: new DictionaryNameValueCollection()); - - await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); - } - } + /// + /// Test to validate that hub region header is added on 404/1002 for single master accounts only, + /// and persists across retries. For multi-master accounts, the header should NOT be added. + /// + [TestMethod] + [DataRow(true, true, DisplayName = "Read request on single master - Hub region header added on 404/1002")] + [DataRow(false, true, DisplayName = "Write request on single master - Hub region header added on 404/1002")] + [DataRow(true, false, DisplayName = "Read request on multi-master - Hub region header NOT added on 404/1002")] + [DataRow(false, false, DisplayName = "Write request on multi-master - Hub region header NOT added on 404/1002")] + public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccountType(bool isReadRequest, bool isSingleMaster) + { + // Arrange + const bool enableEndpointDiscovery = true; + + using GlobalEndpointManager endpointManager = this.Initialize( + useMultipleWriteLocations: !isSingleMaster, + enableEndpointDiscovery: enableEndpointDiscovery, + isPreferredLocationsListEmpty: false, + enforceSingleMasterSingleWriteLocation: isSingleMaster); + + ClientRetryPolicy retryPolicy = new ClientRetryPolicy( + endpointManager, + this.partitionKeyRangeLocationCache, + new RetryOptions(), + enableEndpointDiscovery, + isThinClientEnabled: false); + + DocumentServiceRequest request = this.CreateRequest(isReadRequest: isReadRequest, isMasterResourceType: false); + + // First attempt - header should not exist + retryPolicy.OnBeforeSendRequest(request); + Assert.IsNull(request.Headers.GetValues(HubRegionHeader), "Header should not exist on initial request before any 404/1002 error."); + + // Simulate 404/1002 error + DocumentClientException sessionNotAvailableException = new DocumentClientException( + message: "Simulated 404/1002 ReadSessionNotAvailable", + innerException: null, + statusCode: HttpStatusCode.NotFound, + substatusCode: SubStatusCodes.ReadSessionNotAvailable, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException, CancellationToken.None); + Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry on 404/1002."); + + // Verify header behavior based on account type and that it persists across multiple retry attempts + for (int retryAttempt = 1; retryAttempt <= 3; retryAttempt++) + { + retryPolicy.OnBeforeSendRequest(request); + string[] headerValues = request.Headers.GetValues(HubRegionHeader); + + if (isSingleMaster) + { + // For single master accounts, header should be present and persist across retries + Assert.IsNotNull(headerValues, $"Header should be present on retry attempt {retryAttempt} for single master account."); + Assert.AreEqual(1, headerValues.Length, $"Header should have exactly one value on retry attempt {retryAttempt}."); + Assert.AreEqual(bool.TrueString, headerValues[0], $"Header value should be 'True' on retry attempt {retryAttempt}."); + } + else + { + // For multi-master accounts, header should NOT be present + Assert.IsNull(headerValues, $"Header should NOT be present on retry attempt {retryAttempt} for multi-master account."); + } + + if (retryAttempt < 3) + { + // Simulate another error to trigger next retry + DocumentClientException serviceUnavailableException = new DocumentClientException( + message: "Simulated 503 ServiceUnavailable", + innerException: null, + statusCode: HttpStatusCode.ServiceUnavailable, + substatusCode: SubStatusCodes.Unknown, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); + } + } } private async Task ValidateConnectTimeoutTriggersClientRetryPolicyAsync( From b7b3a37e67803009000c9d25bccb4ba2c3a0dde5 Mon Sep 17 00:00:00 2001 From: Arooshi Avasthy Date: Fri, 23 Jan 2026 14:19:44 -0800 Subject: [PATCH 10/10] Update retry header logic --- .../src/ClientRetryPolicy.cs | 16 +- .../CosmosItemTests.cs | 209 +++++++++--------- .../ClientRetryPolicyTests.cs | 112 +++++++--- 3 files changed, 191 insertions(+), 146 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs index 1b480d4798..3e81c62d0d 100644 --- a/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs +++ b/Microsoft.Azure.Cosmos/src/ClientRetryPolicy.cs @@ -327,22 +327,22 @@ private async Task ShouldRetryInternalAsync( markBothReadAndWriteAsUnavailable: false, forceRefresh: false, retryOnPreferredLocations: true); - } - - if (statusCode == HttpStatusCode.NotFound - && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) + } + + if (statusCode == HttpStatusCode.NotFound && subStatusCode == SubStatusCodes.ReadSessionNotAvailable) { #if !INTERNAL // Only set the hub region processing header for single master accounts - if (!this.canUseMultipleWriteLocations) + // Set header only after the first retry attempt fails with 404/1002 + if (!this.canUseMultipleWriteLocations && this.sessionTokenRetryCount >= 1) { this.addHubRegionProcessingOnlyHeader = true; } #endif return this.ShouldRetryOnSessionNotAvailable(this.documentServiceRequest); - } - - // Received 503 due to client connect timeout or Gateway + } + + // Received 503 due to client connect timeout or Gateway if (statusCode == HttpStatusCode.ServiceUnavailable) { return this.TryMarkEndpointUnavailableForPkRangeAndRetryOnServiceUnavailable( diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs index afd5b89baf..e3a0233461 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosItemTests.cs @@ -4319,113 +4319,110 @@ private static async Task GivenItemAsyncWhenMissingMemberHandlingIsErrorThenExpe } } - [TestMethod] - [Owner("aavasthy")] - [Description("Forces a single 404/1002 from the gateway and verifies ClientRetryPolicy adds x-ms-cosmos-hub-region-processing-only on the retry request.")] - public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002() - { - bool headerObservedOnRetry = false; - int requestCount = 0; - bool shouldReturn404 = true; - - // Created HTTP handler to intercept requests - HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper - { - RequestCallBack = (request, cancellationToken) => - { - // Track all document read requests - if (request.Method == HttpMethod.Get && - request.RequestUri != null && - request.RequestUri.AbsolutePath.Contains("/docs/")) - { - requestCount++; - - // Check for hub header on retry (2nd+ request) - if (requestCount > 1 && - request.Headers.TryGetValues(HubRegionHeader, out IEnumerable values) && - values.Any(v => v.Equals(bool.TrueString, StringComparison.OrdinalIgnoreCase))) - { - headerObservedOnRetry = true; - } - } - - return Task.FromResult(null); - }, - - ResponseIntercepter = (response, request) => - { - if (shouldReturn404 && - request.Method == HttpMethod.Get && - request.RequestUri != null && - request.RequestUri.AbsolutePath.Contains("/docs/")) - { - shouldReturn404 = false; // Only return 404 once - - var errorResponse = new - { - code = "NotFound", - message = "Message: {\"Errors\":[\"Resource Not Found. Learn more: https://aka.ms/cosmosdb-tsg-not-found\"]}\r\nActivityId: " + Guid.NewGuid() + ", Request URI: " + request.RequestUri, - additionalErrorInfo = "" - }; - - HttpResponseMessage notFoundResponse = new HttpResponseMessage(HttpStatusCode.NotFound) - { - Content = new StringContent( - JsonConvert.SerializeObject(errorResponse), - Encoding.UTF8, - "application/json" - ) - }; - - // Add the substatus header for ReadSessionNotAvailable - notFoundResponse.Headers.Add("x-ms-substatus", "1002"); - notFoundResponse.Headers.Add("x-ms-activity-id", Guid.NewGuid().ToString()); - notFoundResponse.Headers.Add("x-ms-request-charge", "1.0"); - - return Task.FromResult(notFoundResponse); - } - - return Task.FromResult(response); - } - }; - - CosmosClientOptions clientOptions = new CosmosClientOptions - { - ConnectionMode = ConnectionMode.Gateway, - ConsistencyLevel = Cosmos.ConsistencyLevel.Session, - HttpClientFactory = () => new HttpClient(httpHandler), - MaxRetryAttemptsOnRateLimitedRequests = 9, - MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.FromSeconds(30) - }; - - using CosmosClient customClient = TestCommon.CreateCosmosClient(clientOptions); - - Container customContainer = customClient.GetContainer(this.database.Id, this.Container.Id); - - // Create a test item first - ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); - await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); - - try - { - // This should trigger 404/1002 on first attempt, then retry with hub header - ItemResponse response = await customContainer.ReadItemAsync( - testItem.id, - new Cosmos.PartitionKey(testItem.pk)); - - Assert.IsNotNull(response); - Assert.IsNotNull(response.Resource); - } - catch (CosmosException) - { - // It's possible the retry also fails, but should still have seen the retry attempt - } - - // Verifying retry happened - Assert.IsTrue(requestCount >= 2, $"Expected at least 2 requests (original + retry), but got {requestCount}"); - Assert.IsTrue(headerObservedOnRetry, $"Expected retry request to include '{HubRegionHeader}: true'"); + [TestMethod] + [Owner("aavasthy")] + [Description("Forces two consecutive 404/1002 responses from the gateway and verifies ClientRetryPolicy sets the hub region header flag after the first retry fails.")] + public async Task ReadItemAsync_ShouldAddHubHeader_OnRetryAfter_404_1002() + { + int requestCount = 0; + int return404Count = 0; + const int maxReturn404 = 2; // Return 404/1002 twice + + // Created HTTP handler to intercept requests + HttpClientHandlerHelper httpHandler = new HttpClientHandlerHelper + { + RequestCallBack = (request, cancellationToken) => + { + // Track all document read requests + if (request.Method == HttpMethod.Get && + request.RequestUri != null && + request.RequestUri.AbsolutePath.Contains("/docs/")) + { + requestCount++; + + // Header should NOT be present on first retry (2nd request) + if (requestCount == 2 && + request.Headers.TryGetValues(HubRegionHeader, out IEnumerable firstRetryValues) && + firstRetryValues.Any()) + { + Assert.Fail("Header should NOT be present on first retry attempt."); + } + + // Return fake 404/1002 for first two requests + if (return404Count < maxReturn404) + { + return404Count++; + + var errorResponse = new + { + code = "NotFound", + message = "Message: {\"Errors\":[\"Resource Not Found. Learn more: https://aka.ms/cosmosdb-tsg-not-found\"]}\r\nActivityId: " + Guid.NewGuid() + ", Request URI: " + request.RequestUri, + additionalErrorInfo = "" + }; + + HttpResponseMessage notFoundResponse = new HttpResponseMessage(HttpStatusCode.NotFound) + { + Content = new StringContent( + JsonConvert.SerializeObject(errorResponse), + Encoding.UTF8, + "application/json" + ) + }; + + // Add the substatus header for ReadSessionNotAvailable + notFoundResponse.Headers.Add("x-ms-substatus", "1002"); + notFoundResponse.Headers.Add("x-ms-activity-id", Guid.NewGuid().ToString()); + notFoundResponse.Headers.Add("x-ms-request-charge", "1.0"); + + return Task.FromResult(notFoundResponse); + } + } + + return Task.FromResult(null); + } + }; + + CosmosClientOptions clientOptions = new CosmosClientOptions + { + ConnectionMode = ConnectionMode.Gateway, + ConsistencyLevel = Cosmos.ConsistencyLevel.Session, + HttpClientFactory = () => new HttpClient(httpHandler), + MaxRetryAttemptsOnRateLimitedRequests = 9, + MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.FromSeconds(30) + }; + + using CosmosClient customClient = TestCommon.CreateCosmosClient(clientOptions); + + Container customContainer = customClient.GetContainer(this.database.Id, this.Container.Id); + + // Create a test item first + ToDoActivity testItem = ToDoActivity.CreateRandomToDoActivity(); + await this.Container.CreateItemAsync(testItem, new Cosmos.PartitionKey(testItem.pk)); + + try + { + // This should trigger 404/1002 twice + // In single-region emulator, after first retry fails with 404/1002, it won't retry again + ItemResponse response = await customContainer.ReadItemAsync( + testItem.id, + new Cosmos.PartitionKey(testItem.pk)); + + Assert.Fail("Expected CosmosException due to consecutive 404/1002 failures."); + } + catch (CosmosException ex) + { + // Expected: After first retry fails with 404/1002, single master won't retry again + Assert.AreEqual(HttpStatusCode.NotFound, ex.StatusCode); + Assert.AreEqual((int)SubStatusCodes.ReadSessionNotAvailable, ex.SubStatusCode); + } + + // Verify the expected behavior: + // 1. Initial request (requestCount = 1) fails with 404/1002 + // 2. First retry (requestCount = 2) fails with 404/1002 + // 3. No more retries because single master + no additional regions + Assert.AreEqual(2, requestCount, $"Expected exactly 2 requests (initial + 1 retry) for single-region emulator, but got {requestCount}"); + Assert.AreEqual(2, return404Count, "Both requests should have returned 404/1002"); } - private async Task AutoGenerateIdPatternTest(Cosmos.PartitionKey pk, T itemWithoutId) { diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs index eef1f84672..01beceec22 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/ClientRetryPolicyTests.cs @@ -404,13 +404,14 @@ public async Task ClientRetryPolicy_NoRetry_MultiMaster_Write_NoPreferredLocatio /// /// Test to validate that hub region header is added on 404/1002 for single master accounts only, - /// and persists across retries. For multi-master accounts, the header should NOT be added. + /// starting from the second retry (after first retry also fails). For multi-master accounts, + /// the header should NOT be added. /// [TestMethod] - [DataRow(true, true, DisplayName = "Read request on single master - Hub region header added on 404/1002")] - [DataRow(false, true, DisplayName = "Write request on single master - Hub region header added on 404/1002")] - [DataRow(true, false, DisplayName = "Read request on multi-master - Hub region header NOT added on 404/1002")] - [DataRow(false, false, DisplayName = "Write request on multi-master - Hub region header NOT added on 404/1002")] + [DataRow(true, true, DisplayName = "Read request on single master - Hub region header added after first retry fails")] + [DataRow(false, true, DisplayName = "Write request on single master - Hub region header added after first retry fails")] + [DataRow(true, false, DisplayName = "Read request on multi-master - Hub region header NOT added")] + [DataRow(false, false, DisplayName = "Write request on multi-master - Hub region header NOT added")] public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccountType(bool isReadRequest, bool isSingleMaster) { // Arrange @@ -435,7 +436,7 @@ public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccou retryPolicy.OnBeforeSendRequest(request); Assert.IsNull(request.Headers.GetValues(HubRegionHeader), "Header should not exist on initial request before any 404/1002 error."); - // Simulate 404/1002 error + // Simulate first 404/1002 error DocumentClientException sessionNotAvailableException = new DocumentClientException( message: "Simulated 404/1002 ReadSessionNotAvailable", innerException: null, @@ -447,37 +448,84 @@ public async Task ClientRetryPolicy_HubRegionHeader_AddedOn404_1002_BasedOnAccou ShouldRetryResult shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException, CancellationToken.None); Assert.IsTrue(shouldRetry.ShouldRetry, "Should retry on 404/1002."); - // Verify header behavior based on account type and that it persists across multiple retry attempts - for (int retryAttempt = 1; retryAttempt <= 3; retryAttempt++) + // First retry attempt - header should NOT be present yet + retryPolicy.OnBeforeSendRequest(request); + string[] headerValues = request.Headers.GetValues(HubRegionHeader); + Assert.IsNull(headerValues, "Header should NOT be present on first retry attempt (before it fails)."); + + // Simulate first retry also failing with 404/1002 + DocumentClientException sessionNotAvailableException2 = new DocumentClientException( + message: "Simulated 404/1002 ReadSessionNotAvailable on first retry", + innerException: null, + statusCode: HttpStatusCode.NotFound, + substatusCode: SubStatusCodes.ReadSessionNotAvailable, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + shouldRetry = await retryPolicy.ShouldRetryAsync(sessionNotAvailableException2, CancellationToken.None); + + if (isSingleMaster) + { + // For single master, after one retry fails with 404/1002, it won't retry further + // But the header flag should be set for any potential future retries due to other errors + Assert.IsFalse(shouldRetry.ShouldRetry, "Single master should not retry again after first 404/1002 retry fails."); + + // The header flag should be set even though no more 404/1002 retries will happen + // This ensures if the request is retried for a different reason (e.g., 503), it will have the header + } + else { - retryPolicy.OnBeforeSendRequest(request); - string[] headerValues = request.Headers.GetValues(HubRegionHeader); + // Multi-master can retry across multiple regions + Assert.IsTrue(shouldRetry.ShouldRetry, "Multi-master should continue retrying on 404/1002."); + } - if (isSingleMaster) - { - // For single master accounts, header should be present and persist across retries - Assert.IsNotNull(headerValues, $"Header should be present on retry attempt {retryAttempt} for single master account."); - Assert.AreEqual(1, headerValues.Length, $"Header should have exactly one value on retry attempt {retryAttempt}."); - Assert.AreEqual(bool.TrueString, headerValues[0], $"Header value should be 'True' on retry attempt {retryAttempt}."); - } - else + // For single master: Verify header would be added if request is retried for other reasons (e.g., 503) + // For multi-master: Verify header is NOT added even on subsequent retries + if (isSingleMaster) + { + // Simulate a 503 error to trigger another retry + DocumentClientException serviceUnavailableException = new DocumentClientException( + message: "Simulated 503 ServiceUnavailable", + innerException: null, + statusCode: HttpStatusCode.ServiceUnavailable, + substatusCode: SubStatusCodes.Unknown, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + shouldRetry = await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); + + if (shouldRetry.ShouldRetry) { - // For multi-master accounts, header should NOT be present - Assert.IsNull(headerValues, $"Header should NOT be present on retry attempt {retryAttempt} for multi-master account."); + // Now verify the header is present on this retry triggered by 503 + retryPolicy.OnBeforeSendRequest(request); + headerValues = request.Headers.GetValues(HubRegionHeader); + Assert.IsNotNull(headerValues, "Header should be present on retry after 404/1002 flag was set."); + Assert.AreEqual(1, headerValues.Length, "Header should have exactly one value."); + Assert.AreEqual(bool.TrueString, headerValues[0], "Header value should be 'True'."); } - - if (retryAttempt < 3) + } + else + { + // For multi-master: Verify header is NOT added even on subsequent retries + for (int retryAttempt = 2; retryAttempt <= 3; retryAttempt++) { - // Simulate another error to trigger next retry - DocumentClientException serviceUnavailableException = new DocumentClientException( - message: "Simulated 503 ServiceUnavailable", - innerException: null, - statusCode: HttpStatusCode.ServiceUnavailable, - substatusCode: SubStatusCodes.Unknown, - requestUri: request.RequestContext.LocationEndpointToRoute, - responseHeaders: new DictionaryNameValueCollection()); - - await retryPolicy.ShouldRetryAsync(serviceUnavailableException, CancellationToken.None); + if (shouldRetry.ShouldRetry) + { + retryPolicy.OnBeforeSendRequest(request); + headerValues = request.Headers.GetValues(HubRegionHeader); + Assert.IsNull(headerValues, $"Header should NOT be present on retry attempt {retryAttempt} for multi-master account."); + + // Simulate another 404/1002 or 503 to continue retry loop + DocumentClientException nextException = new DocumentClientException( + message: $"Simulated error on retry {retryAttempt}", + innerException: null, + statusCode: retryAttempt % 2 == 0 ? HttpStatusCode.ServiceUnavailable : HttpStatusCode.NotFound, + substatusCode: retryAttempt % 2 == 0 ? SubStatusCodes.Unknown : SubStatusCodes.ReadSessionNotAvailable, + requestUri: request.RequestContext.LocationEndpointToRoute, + responseHeaders: new DictionaryNameValueCollection()); + + shouldRetry = await retryPolicy.ShouldRetryAsync(nextException, CancellationToken.None); + } } } }