diff --git a/Microsoft.Azure.Cosmos/src/Resource/CosmosExceptions/CosmosOperationCanceledException.cs b/Microsoft.Azure.Cosmos/src/Resource/CosmosExceptions/CosmosOperationCanceledException.cs index 4ed0ac8fb4..72a0f41233 100644 --- a/Microsoft.Azure.Cosmos/src/Resource/CosmosExceptions/CosmosOperationCanceledException.cs +++ b/Microsoft.Azure.Cosmos/src/Resource/CosmosExceptions/CosmosOperationCanceledException.cs @@ -19,11 +19,13 @@ namespace Microsoft.Azure.Cosmos /// [Serializable] public class CosmosOperationCanceledException : OperationCanceledException, ICloneable - { + { + private readonly Object thisLock = new object(); private readonly OperationCanceledException originalException; - private readonly Lazy lazyMessage; - private readonly Lazy toStringMessage; - private readonly bool tokenCancellationRequested; + private readonly bool tokenCancellationRequested; + + private string lazyMessage; + private string toStringMessage; /// /// Create an instance of CosmosOperationCanceledException @@ -38,8 +40,8 @@ public CosmosOperationCanceledException( this.originalException = originalException ?? throw new ArgumentNullException(nameof(originalException)); this.Diagnostics = diagnostics ?? throw new ArgumentNullException(nameof(diagnostics)); this.tokenCancellationRequested = originalException.CancellationToken.IsCancellationRequested; - this.toStringMessage = this.CreateToStringMessage(); - this.lazyMessage = this.CreateLazyMessage(); + this.toStringMessage = null; + this.lazyMessage = null; } internal CosmosOperationCanceledException( @@ -61,8 +63,8 @@ internal CosmosOperationCanceledException( } this.Diagnostics = new CosmosTraceDiagnostics(trace); this.tokenCancellationRequested = originalException.CancellationToken.IsCancellationRequested; - this.toStringMessage = this.CreateToStringMessage(); - this.lazyMessage = this.CreateLazyMessage(); + this.toStringMessage = null; + this.lazyMessage = null; } /// @@ -75,8 +77,8 @@ protected CosmosOperationCanceledException(SerializationInfo info, StreamingCont { this.originalException = (OperationCanceledException)info.GetValue("originalException", typeof(OperationCanceledException)); this.tokenCancellationRequested = (bool)info.GetValue("tokenCancellationRequested", typeof(bool)); - this.lazyMessage = new Lazy(() => (string)info.GetValue("lazyMessage", typeof(string))); - this.toStringMessage = new Lazy(() => (string)info.GetValue("toStringMessage", typeof(string))); + this.lazyMessage = null; + this.toStringMessage = null; //Diagnostics cannot be serialized this.Diagnostics = new CosmosTraceDiagnostics(NoOpTrace.Singleton); } @@ -89,7 +91,7 @@ public override string Source } /// - public override string Message => this.lazyMessage.Value; + public override string Message => this.EnsureLazyMessage(); /// #pragma warning disable CDX1002 // DontUseExceptionStackTrace @@ -120,18 +122,42 @@ public override Exception GetBaseException() /// public override string ToString() { - return this.toStringMessage.Value; + return this.EnsureToStringMessage(skipDiagnostics: false); } - private Lazy CreateLazyMessage() - { - return new Lazy(() => $"{this.originalException.Message}{Environment.NewLine}Cancellation Token has expired: {this.tokenCancellationRequested}. Learn more at: https://aka.ms/cosmosdb-tsg-request-timeout{Environment.NewLine}CosmosDiagnostics: {this.Diagnostics}"); - } - private Lazy CreateToStringMessage() - { - return new Lazy(() => $"{this.originalException}{Environment.NewLine}Cancellation Token has expired: {this.tokenCancellationRequested}. Learn more at: https://aka.ms/cosmosdb-tsg-request-timeout{Environment.NewLine}CosmosDiagnostics: {this.Diagnostics}"); - } - + private string EnsureLazyMessage() + { + if (this.lazyMessage != null) + { + return this.lazyMessage; + } + + lock (this.thisLock) + { + return this.lazyMessage ??= + $"{this.originalException.Message}{Environment.NewLine}Cancellation Token has expired: {this.tokenCancellationRequested}. Learn more at: https://aka.ms/cosmosdb-tsg-request-timeout{Environment.NewLine}CosmosDiagnostics: {this.Diagnostics}"; + } + } + + internal string EnsureToStringMessage(bool skipDiagnostics) + { + if (this.toStringMessage != null) + { + return this.toStringMessage; + } + + lock (this.thisLock) + { + if (skipDiagnostics) + { + return this.toStringMessage ??= + $"{this.originalException}{Environment.NewLine}Cancellation Token has expired: {this.tokenCancellationRequested}. Learn more at: https://aka.ms/cosmosdb-tsg-request-timeout"; + } + return this.toStringMessage ??= + $"{this.originalException}{Environment.NewLine}Cancellation Token has expired: {this.tokenCancellationRequested}. Learn more at: https://aka.ms/cosmosdb-tsg-request-timeout{Environment.NewLine}CosmosDiagnostics: {this.Diagnostics}"; + } + } + /// /// RecordOtelAttributes /// @@ -159,8 +185,8 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont info.AddValue("originalException", this.originalException); #pragma warning restore CDX1000 // DontConvertExceptionToObject info.AddValue("tokenCancellationRequested", this.tokenCancellationRequested); - info.AddValue("lazyMessage", this.lazyMessage.Value); - info.AddValue("toStringMessage", this.toStringMessage.Value); + info.AddValue("lazyMessage", this.EnsureLazyMessage()); + info.AddValue("toStringMessage", this.EnsureToStringMessage(skipDiagnostics: false)); } /// diff --git a/Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs b/Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs index f3880a517c..8a2eabc13b 100644 --- a/Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs +++ b/Microsoft.Azure.Cosmos/src/Tracing/TraceWriter.TraceJsonWriter.cs @@ -130,9 +130,14 @@ private static void WriteTraceDatum(IJsonWriter writer, object value) { writer.WriteStringValue(stringValue); } - else - { - writer.WriteStringValue(value.ToString()); + else if (value is CosmosOperationCanceledException cosmosTimeoutException) + { + writer.WriteStringValue( + cosmosTimeoutException.EnsureToStringMessage(skipDiagnostics: true)); + } + else + { + writer.WriteStringValue(value.ToString()); } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosOperationCanceledExceptionTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosOperationCanceledExceptionTests.cs index 52c1aab8b0..4c8735d73b 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosOperationCanceledExceptionTests.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CosmosOperationCanceledExceptionTests.cs @@ -7,6 +7,8 @@ namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests using System; using System.Threading; using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Diagnostics; + using Microsoft.Azure.Cosmos.Tracing; using Microsoft.VisualStudio.TestTools.UnitTesting; [TestClass] @@ -76,6 +78,32 @@ public async Task CheckCancellationTokenDirectTestAsync() await this.CheckCancellationTokenTestAsync(this.Container, cancellationTokenSource.Token); } + [TestMethod] + public void CheckToJsonStringConversion() + { + // This test is reproducing an issue that an internal customer faced after enabling + // cross region hedging + // In their scenario they occassionally hit e2e timeouts as CosmosOperationCancelledException + // The JSON transformation of the CosmosOperationCancelledException in this case could run into + // an issue (InvalidOperationException due to Lazy factory hitting a cyclic dependency + // when neither the diagnostics nor the exception had materialized the json string yet + // because the CosmosOperationCancelledException ias added as a trace datum to the ITrace of + // the CosmosTraceDiagnostics instance. + // The test below reproduced the issue - and is kept here to validate the fix and + // as a regression test + using ITrace outerTrace = Trace.GetRootTrace("cyclicDependencyReproOuter"); + using ITrace innerTrace = outerTrace.StartChild("cyclicDependencyReproInner"); + + OperationCanceledException innerTimeout = new OperationCanceledException(); + CosmosOperationCanceledException innerCosmosTimeoutException = new CosmosOperationCanceledException( + innerTimeout, + innerTrace); + CosmosOperationCanceledException outerCosmosTimeoutException = new CosmosOperationCanceledException( + innerCosmosTimeoutException, + outerTrace); + + Console.WriteLine(outerCosmosTimeoutException.ToString()); + } private async Task CheckCancellationTokenTestAsync( Container container,