Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2a77217
Initial plan
Copilot Aug 1, 2025
8609dea
Fix InvalidOperationException in CosmosDiagnostics.ToString() due to …
Copilot Aug 1, 2025
ba096bd
Complete fix for InvalidOperationException - handle all trace enumera…
Copilot Aug 1, 2025
f2fd10e
Merge branch 'master' into copilot/fix-5112
FabianMeiswinkel Aug 14, 2025
9a2c674
Update trace walking logic to prevent modifications during walk opera…
Copilot Aug 14, 2025
4e93c98
Remove unused snapshot methods from Trace class
Copilot Aug 14, 2025
001766a
Address reviewer feedback: centralize walking state management in Cos…
Copilot Aug 14, 2025
7c15aa8
Address reviewer feedback: return NoOpTrace and fix locking strategy
Copilot Aug 14, 2025
824d33d
Address reviewer feedback: centralize walking state management and re…
Copilot Aug 15, 2025
dd1ccd7
Some nits manually pushed
kirankumarkolli Aug 15, 2025
4a302d0
Address reviewer feedback: move IsBeingWalked to ITrace interface and…
Copilot Aug 15, 2025
1c0d072
Few nits
kirankumarkolli Aug 15, 2025
e00ed26
Changing the logic to not ignore modifications when materialization h…
FabianMeiswinkel Aug 18, 2025
eba2640
Update Trace.cs
FabianMeiswinkel Aug 18, 2025
c5b66cc
Update SummaryDiagnostics.cs
FabianMeiswinkel Aug 18, 2025
ab1bff2
Update Trace.cs
FabianMeiswinkel Aug 18, 2025
563dd44
Update Trace.cs
FabianMeiswinkel Aug 18, 2025
b221911
Adding Assert on IsBeingWalked
FabianMeiswinkel Aug 18, 2025
6820d2e
Fixing CorrelatedActivityId Tracing
FabianMeiswinkel Aug 18, 2025
130a8c3
Update Trace.cs
FabianMeiswinkel Aug 18, 2025
7b73bbe
Update BatchAsyncOperationContextTests.cs
FabianMeiswinkel Aug 19, 2025
26a05bb
Update BatchAsyncOperationContextTests.cs
FabianMeiswinkel Aug 19, 2025
4a7eeb4
Update PartitionKeyBatchResponseTests.cs
FabianMeiswinkel Aug 19, 2025
b9cd3d6
Update CosmosDiagnosticsUnitTests.cs
FabianMeiswinkel Aug 19, 2025
c8125d3
Update Trace.cs
FabianMeiswinkel Aug 19, 2025
269aeb4
Update RegionFailoverTests.cs
FabianMeiswinkel Aug 19, 2025
07cce9c
Update TraceTests.cs
FabianMeiswinkel Aug 19, 2025
6d64bff
Update TraceTests.cs
FabianMeiswinkel Aug 19, 2025
9109651
Update NoOpTrace.cs
FabianMeiswinkel Aug 19, 2025
2c06779
Update NoOpTrace.cs
FabianMeiswinkel Aug 19, 2025
059a5ce
Fixing test issues
FabianMeiswinkel Aug 19, 2025
d3050b0
Update FullPipelineTests.cs
FabianMeiswinkel Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public CosmosTraceDiagnostics(ITrace trace)

public override string ToString()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.ToJsonString();
}

Expand All @@ -50,16 +55,31 @@ public override TimeSpan GetClientElapsedTime()

public override IReadOnlyList<(string regionName, Uri uri)> GetContactedRegions()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.Value?.Summary?.RegionsContacted;
}

public override ServerSideCumulativeMetrics GetQueryMetrics()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.accumulatedMetrics.Value;
}

internal bool IsGoneExceptionHit()
{
if (this.Value is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}

return this.WalkTraceTreeForGoneException(this.Value);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public int AwaitedTasks

IReadOnlyDictionary<string, object> ITrace.Data => this.innerTrace.Data;

bool ITrace.IsBeingWalked => this.innerTrace.IsBeingWalked;

public ParallelPrefetchTestConfig(
ArrayPool<IPrefetcher> prefetcherPool,
ArrayPool<Task> taskPool,
Expand Down Expand Up @@ -116,6 +118,11 @@ void IDisposable.Dispose()
{
this.innerTrace.Dispose();
}

bool ITrace.TryGetDatum(string key, out object datum)
{
return this.innerTrace.TryGetDatum(key, out datum);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ public static void WalkTraceTreeForQueryMetrics(ITrace currentTrace, ServerSideM
return;
}

// Assert that walking state is set
System.Diagnostics.Debug.Assert(currentTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");

foreach (object datum in currentTrace.Data.Values)
{
if (datum is QueryMetricsTraceDatum queryMetricsTraceDatum)
Expand All @@ -41,6 +44,9 @@ private static void WalkTraceTreeForPartitionInfo(ITrace currentTrace, ServerSid
return;
}

// Assert that walking state is set
System.Diagnostics.Debug.Assert(currentTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");

foreach (Object datum in currentTrace.Data.Values)
{
if (datum is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public override async Task<ResponseMessage> ReadNextAsync(ITrace trace, Cancella

// If Correlated Id already exists and is different, add a new one in comma separated list
// Scenario: A new iterator is created with same ContinuationToken and Trace
if (trace.Data.TryGetValue(QueryIterator.CorrelatedActivityIdKeyName, out object correlatedActivityIds))
if (trace.TryGetDatum(QueryIterator.CorrelatedActivityIdKeyName, out object correlatedActivityIds))
{
List<string> correlatedIdList = correlatedActivityIds.ToString().Split(',').ToList();
if (!correlatedIdList.Contains(this.correlatedActivityId.ToString()))
Expand Down
12 changes: 12 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/ITrace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ interface ITrace : IDisposable
/// </summary>
IReadOnlyDictionary<string, object> Data { get; }

/// <summary>
/// Gets a value indicating whether this trace is currently being walked.
/// </summary>
bool IsBeingWalked { get; }

/// <summary>
/// Starts a Trace and adds it as a child to this instance.
/// </summary>
Expand Down Expand Up @@ -115,5 +120,12 @@ ITrace StartChild(
/// <param name="trace">Existing trace.</param>
void AddChild(ITrace trace);

/// <summary>
/// Tries to get a specific datum - it is safe to call this even before IsBeingWalked is set.
/// </summary>
/// <param name="key">The key to identify the datum.</param>
/// <param name="datum">The datum itself.</param>
/// <returns>A flag indicating whether the datum with this key exists.</returns>
bool TryGetDatum(string key, out object datum);
}
}
8 changes: 8 additions & 0 deletions Microsoft.Azure.Cosmos/src/Tracing/NoOpTrace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ private NoOpTrace()

public IReadOnlyDictionary<string, object> Data => NoOpData;

public bool IsBeingWalked => true; // this needs to return true to allow materialization of NoOpTrace

public void Dispose()
{
// NoOp
Expand Down Expand Up @@ -86,5 +88,11 @@ public void UpdateRegionContacted(TraceDatum traceDatum)
{
// NoOp
}

bool ITrace.TryGetDatum(string key, out object datum)
{
datum = null;
return false;
}
}
}
139 changes: 126 additions & 13 deletions Microsoft.Azure.Cosmos/src/Tracing/Trace.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,15 @@ namespace Microsoft.Azure.Cosmos.Tracing
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using Microsoft.Azure.Documents;

internal sealed class Trace : ITrace
{
private static readonly IReadOnlyDictionary<string, object> EmptyDictionary = new Dictionary<string, object>();
private readonly List<ITrace> children;
private readonly Lazy<Dictionary<string, object>> data;
private volatile List<ITrace> children;
private volatile Dictionary<string, object> data;
private ValueStopwatch stopwatch;
private volatile bool isBeingWalked;

private Trace(
string name,
Expand All @@ -34,7 +32,7 @@ private Trace(
this.Component = component;
this.Parent = parent;
this.children = new List<ITrace>();
this.data = new Lazy<Dictionary<string, object>>();
this.data = null;
this.Summary = summary ?? throw new ArgumentNullException(nameof(summary));
}

Expand All @@ -54,9 +52,37 @@ private Trace(

public ITrace Parent { get; }

public IReadOnlyList<ITrace> Children => this.children;
// NOTE: no lock necessary here only because this.children is volatile
// and every reference to it is immutable when isBeingWalked == true
// and isBeingWalked is guaranteed to be set to true before this
// Property is called
public IReadOnlyList<ITrace> Children
{
get
{
// Assert that walking state is set
Debug.Assert(this.isBeingWalked, "SetWalkingStateRecursively should be set to true");

return this.children;
}
}

public IReadOnlyDictionary<string, object> Data => this.data.IsValueCreated ? this.data.Value : Trace.EmptyDictionary;
// NOTE: no lock necessary here only because this.data is volatile
// and every reference to it is immutable when isBeingWalked == true
// and isBeingWalked is guaranteed to be set to true before this
// Property is called
public IReadOnlyDictionary<string, object> Data
{
get
{
// Assert that walking state is set
Debug.Assert(this.isBeingWalked, "SetWalkingStateRecursively should be set to true");

return this.data ?? Trace.EmptyDictionary;
}
}

public bool IsBeingWalked => this.isBeingWalked;

public void Dispose()
{
Expand Down Expand Up @@ -90,14 +116,30 @@ public ITrace StartChild(
summary: this.Summary);

this.AddChild(child);

return child;
}

public void AddChild(ITrace child)
{
lock (this.children)
lock (this.Name)
{
this.children.Add(child);
if (!this.isBeingWalked)
{
this.children.Add(child);

return;
}

if (child is Trace traceChild)
{
traceChild.SetWalkingStateRecursively();
}

List<ITrace> writableSnapshot = new List<ITrace>(this.children.Count + 1);
writableSnapshot.AddRange(this.children);
writableSnapshot.Add(child);
this.children = writableSnapshot;
}
}

Expand All @@ -124,18 +166,89 @@ public static Trace GetRootTrace(

public void AddDatum(string key, TraceDatum traceDatum)
{
this.data.Value.Add(key, traceDatum);
this.Summary.UpdateRegionContacted(traceDatum);
this.AddDatum(key, traceDatum as Object);
}

public void AddDatum(string key, object value)
{
this.data.Value.Add(key, value);
lock (this.Name)
{
this.data ??= new Dictionary<string, object>();

if (!this.isBeingWalked)
{
// If materialization has not started yet no cloning is needed
this.data.Add(key, value);
return;
}

this.data = new Dictionary<string, object>(this.data)
{
{ key, value }
};
}
}

public void AddOrUpdateDatum(string key, object value)
{
this.data.Value[key] = value;
lock (this.Name)
{
this.data ??= new Dictionary<string, object>();

if (!this.isBeingWalked)
{
// If materialization has not started yet no cloning is needed
this.data[key] = value;
return; // Ignore modifications while being walked
}

this.data = new Dictionary<string, object>(this.data)
{
[key] = value
};
}
}

internal void SetWalkingStateRecursively()
{
if (this.isBeingWalked)
{
return; // Already set, return early
}

lock (this.Name)
{
if (this.isBeingWalked)
{
return; // Already set, return early
}

foreach (ITrace child in this.children)
{
if (child is Trace concreteChild)
{
concreteChild.SetWalkingStateRecursively();
}
}

// Set the walking state for this trace after processing children
this.isBeingWalked = true;
}
}

bool ITrace.TryGetDatum(string key, out object datum)
{
lock (this.Name)
{
if (this.data == null)
{
datum = null;
return false;
}

return this.data.TryGetValue(key, out datum);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace Microsoft.Azure.Cosmos.Tracing.TraceData
{
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using Microsoft.Azure.Cosmos.Json;
Expand All @@ -22,7 +23,11 @@ public SummaryDiagnostics(ITrace trace)
= new Lazy<Dictionary<(int, int), int>>(() => new Dictionary<(int, int), int>());
this.AllRegionsContacted
= new Lazy<HashSet<Uri>>(() => new HashSet<Uri>());


if (trace is Tracing.Trace rootConcreteTrace)
{
rootConcreteTrace.SetWalkingStateRecursively();
}
this.CollectSummaryFromTraceTree(trace);
}

Expand All @@ -38,6 +43,9 @@ public SummaryDiagnostics(ITrace trace)

private void CollectSummaryFromTraceTree(ITrace currentTrace)
{
// Assert that walking state is set
Debug.Assert(currentTrace.IsBeingWalked, "SetWalkingStateRecursively should be set to true");

foreach (object datums in currentTrace.Data.Values)
{
if (datums is ClientSideRequestStatisticsTraceDatum clientSideRequestStatisticsTraceDatum)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1865,6 +1865,8 @@ public TraceForBaselineTesting(

public IReadOnlyDictionary<string, object> Data => this.data;

public bool IsBeingWalked => true; // needs to return true to allow materialization

public IReadOnlyList<(string, Uri)> RegionsContacted => new List<(string, Uri)>();

public void AddDatum(string key, TraceDatum traceDatum)
Expand Down Expand Up @@ -1924,6 +1926,11 @@ public void AddOrUpdateDatum(string key, object value)

this.data[key] = "Redacted To Not Change The Baselines From Run To Run";
}

bool ITrace.TryGetDatum(string key, out object datum)
{
return this.data.TryGetValue(key, out datum);
}
}

private sealed class RequestHandlerSleepHelper : RequestHandler
Expand Down
Loading