Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0b9a887
PPAF : Separate retry timeouts for Reads and Query
Praveen-Msft Nov 10, 2025
c1bc018
Merge branch 'master' into fix-split-timeout-by-operation-type
Praveen-Msft Nov 10, 2025
f9ec5f6
Code changes to add integration tests. Fixing cross regional retry.
kundadebdatta Nov 11, 2025
7c4cd25
Adjust read timeouts, update test to do single region failover.
Praveen-Msft Nov 11, 2025
d3c4ac7
Merge branch 'master' into fix-split-timeout-by-operation-type
Praveen-Msft Nov 11, 2025
3437f7a
Add diagnostics to the exception message for pipeline debugging purpose
Praveen-Msft Nov 11, 2025
85f4c5c
Merge branch 'fix-split-timeout-by-operation-type' of https://github.…
Praveen-Msft Nov 11, 2025
f3cb0a1
Add logging for debugging the contacted regions update issue.
Praveen-Msft Nov 12, 2025
fdb7595
Code changes to fix set to string conversation.
kundadebdatta Nov 12, 2025
27fedc8
Code changes to address review comments
kundadebdatta Nov 12, 2025
891ff37
Code changes to fix failing tests
kundadebdatta Nov 12, 2025
87f3e93
Adding more logs.
kundadebdatta Nov 12, 2025
300368a
Update the last two timeout to 6s for reads
Praveen-Msft Nov 12, 2025
98abacd
Code changes to skip parallization for uery test.
kundadebdatta Nov 12, 2025
dc81b40
Code changes to remove assertions.
kundadebdatta Nov 12, 2025
8fb590c
Code changes to add tracking
kundadebdatta Nov 12, 2025
b4912ce
Code changes to ignore other PPCB tests.
kundadebdatta Nov 12, 2025
0f0f39e
Code changes to run similar tests sequentially.
kundadebdatta Nov 12, 2025
4597518
Code changes correctly reset environment variable.
kundadebdatta Nov 12, 2025
c382975
Code changes to add SylQuery as a check.
kundadebdatta Nov 12, 2025
7092b25
Update the PPAF policies for point reads and non point reads
Praveen-Msft Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions Microsoft.Azure.Cosmos/src/HttpClient/HttpTimeoutPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,16 @@ public static HttpTimeoutPolicy GetTimeoutPolicy(
// Data Plane Reads.
else if (documentServiceRequest.IsReadOnlyRequest)
{
return isPartitionLevelFailoverEnabled
? HttpTimeoutPolicyForPartitionFailover.InstanceShouldThrow503OnTimeout
: HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout;
if (isPartitionLevelFailoverEnabled)
{
return documentServiceRequest.OperationType == OperationType.Query
? HttpTimeoutPolicyForPartitionFailover.InstanceShouldThrow503OnTimeoutForQuery
: HttpTimeoutPolicyForPartitionFailover.InstanceShouldThrow503OnTimeoutForReads;
}
else
{
return HttpTimeoutPolicyDefault.InstanceShouldThrow503OnTimeout;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,42 @@ namespace Microsoft.Azure.Cosmos

internal sealed class HttpTimeoutPolicyForPartitionFailover : HttpTimeoutPolicy
{
public static readonly HttpTimeoutPolicy Instance = new HttpTimeoutPolicyForPartitionFailover(false);
public static readonly HttpTimeoutPolicy InstanceShouldThrow503OnTimeout = new HttpTimeoutPolicyForPartitionFailover(true);
public bool isQuery;
public static readonly HttpTimeoutPolicy InstanceShouldThrow503OnTimeoutForQuery = new HttpTimeoutPolicyForPartitionFailover(true);
public static readonly HttpTimeoutPolicy InstanceShouldThrow503OnTimeoutForReads = new HttpTimeoutPolicyForPartitionFailover(false);

public bool shouldThrow503OnTimeout;
private static readonly string Name = nameof(HttpTimeoutPolicyDefault);

private HttpTimeoutPolicyForPartitionFailover(bool shouldThrow503OnTimeout)
private HttpTimeoutPolicyForPartitionFailover(bool isQuery)
{
this.shouldThrow503OnTimeout = shouldThrow503OnTimeout;
this.isQuery = isQuery;
}

private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelays = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>()
// Timeouts and delays are based on the following rationale:
// For reads: 3 agressive attempts with timeouts of .5s, .5s, and 1s respectively.
// For queries: 3 attempts with timeouts of 5s, 5s, and 10s respectively.
private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelaysForReads = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>()
{
(TimeSpan.FromSeconds(.5), TimeSpan.Zero),
(TimeSpan.FromSeconds(.5), TimeSpan.Zero),
(TimeSpan.FromSeconds(1), TimeSpan.Zero),
};

private readonly IReadOnlyList<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> TimeoutsAndDelaysForQueries = new List<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)>()
{
(TimeSpan.FromSeconds(5), TimeSpan.Zero),
(TimeSpan.FromSeconds(5), TimeSpan.Zero),
(TimeSpan.FromSeconds(10), TimeSpan.Zero),
};

public override string TimeoutPolicyName => HttpTimeoutPolicyForPartitionFailover.Name;

public override int TotalRetryCount => this.TimeoutsAndDelays.Count;
public override int TotalRetryCount => this.isQuery ? this.TimeoutsAndDelaysForQueries.Count : this.TimeoutsAndDelaysForReads.Count;

public override IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> GetTimeoutEnumerator()
{
return this.TimeoutsAndDelays.GetEnumerator();
return this.isQuery ? this.TimeoutsAndDelaysForQueries.GetEnumerator() : this.TimeoutsAndDelaysForReads.GetEnumerator();
}

public override bool ShouldRetryBasedOnResponse(HttpMethod requestHttpMethod, HttpResponseMessage responseMessage)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,53 @@ await TestScenarioAsync(
expectedNumberOfRetrys: 3);
}

[TestMethod]
public void HttpTimeoutPolicyForParitionFailoverForQueries()
{
HttpTimeoutPolicy httpTimeoutPolicyForQuery = HttpTimeoutPolicy.GetTimeoutPolicy(
documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Query),
isPartitionLevelFailoverEnabled: true,
isThinClientEnabled: false);
IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> availableRetries = httpTimeoutPolicyForQuery.GetTimeoutEnumerator();

int count = 0;
while (availableRetries.MoveNext())
{
if (count <= 1)
{
Assert.AreEqual(new TimeSpan(0,0,5), availableRetries.Current.requestTimeout);
}
else if (count == 2)
{
Assert.AreEqual(new TimeSpan(0, 0, 10), availableRetries.Current.requestTimeout);
}
count++;
}
}

[TestMethod]
public void HttpTimeoutPolicyForParitionFailoverForReads()
{
HttpTimeoutPolicy httpTimeoutPolicyForQuery = HttpTimeoutPolicy.GetTimeoutPolicy(
documentServiceRequest: CosmosHttpClientCoreTests.CreateDocumentServiceRequestByOperation(ResourceType.Document, OperationType.Read),
isPartitionLevelFailoverEnabled: true,
isThinClientEnabled: false);
IEnumerator<(TimeSpan requestTimeout, TimeSpan delayForNextRequest)> availableRetries = httpTimeoutPolicyForQuery.GetTimeoutEnumerator();

int count = 0;
while (availableRetries.MoveNext())
{
if (count <= 1)
{
Assert.AreEqual(new TimeSpan(0, 0,0, 0, 500), availableRetries.Current.requestTimeout);
}
else if (count == 2)
{
Assert.AreEqual(new TimeSpan(0, 0, 1), availableRetries.Current.requestTimeout);
}
count++;
}
}

private static DocumentServiceRequest CreateDocumentServiceRequestByOperation(
ResourceType resourceType,
Expand Down
Loading