Skip to content

Commit 36df172

Browse files
committed
simpler sampler logic
1 parent 340ac5c commit 36df172

File tree

2 files changed

+90
-44
lines changed

2 files changed

+90
-44
lines changed

Microsoft.Azure.Cosmos/src/Telemetry/Sampler/DataSampler.cs

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ namespace Microsoft.Azure.Cosmos.Telemetry
1010
using System.Linq;
1111
using Microsoft.Azure.Cosmos.Query.Core.Pipeline.CrossPartition.OrderBy;
1212
using Microsoft.Azure.Cosmos.Telemetry.Models;
13+
using Newtonsoft.Json.Linq;
1314

1415
/// <summary>
1516
/// Sampler to select top N unique records and return true/false on the basis of elements already selected.
@@ -22,77 +23,76 @@ public static List<RequestInfo> OrderAndSample(List<RequestInfo> requestInfoList
2223
List<RequestInfo> sampledData = new List<RequestInfo>(capacity: requestInfoList.Count);
2324

2425
// Processing (Grouping, Sorting will happen in this collection)
25-
IDictionary<int, List<KeyValuePair<double, RequestInfo>>> sampledRawData = new Dictionary<int, List<KeyValuePair<double, RequestInfo>>>();
26+
IDictionary<int, List<RequestInfo>> sampledRawData = new Dictionary<int, List<RequestInfo>>();
2627

2728
foreach (RequestInfo requestInfo in requestInfoList)
2829
{
2930
// Get a unique key identifier for an object
3031
int key = requestInfo.GetHashCodeForSampler();
31-
32-
// Check if similar object is already present
33-
if (sampledRawData.TryGetValue(key, out List<KeyValuePair<double, RequestInfo>> sortedData))
32+
33+
// Check if similar object is already present otherwise create a new list and add
34+
if (sampledRawData.TryGetValue(key, out List<RequestInfo> groupedData))
3435
{
35-
// Add the new object to the list
36-
DataSampler.AddToList(orderBy, requestInfo, sortedData);
37-
38-
sortedData.Sort(DataComparer.Instance);
39-
40-
if (sortedData.Count > ClientTelemetryOptions.NetworkRequestsSampleSizeThreshold)
41-
{
42-
sortedData.RemoveAt(sortedData.Count - 1);
43-
}
44-
sampledRawData[key] = sortedData;
36+
groupedData.Add(requestInfo);
37+
sampledRawData[key] = groupedData;
4538
}
4639
else
4740
{
48-
// Create a new list of KeyValue pair where we will be sorting this list by the key and Value is original Request Info object
49-
// In this case key can be duplicated as latency and samplecount can be same for different scenario, hence using KeyValuePair to store this info
50-
List<KeyValuePair<double, RequestInfo>> newSortedData
51-
= new List<KeyValuePair<double, RequestInfo>>(ClientTelemetryOptions.NetworkRequestsSampleSizeThreshold + 1);
52-
53-
DataSampler.AddToList(orderBy, requestInfo, newSortedData);
54-
55-
sampledRawData.Add(key, newSortedData);
41+
sampledRawData.Add(key, new List<RequestInfo>() { requestInfo });
5642
}
5743
}
5844

59-
foreach (List<KeyValuePair<double, RequestInfo>> sampledRequestInfo in sampledRawData.Values)
45+
// Get the comparator
46+
IComparer<RequestInfo> comparer = DataSampler.GetComparer(orderBy);
47+
48+
// If list is greater than threshold then sort it and get top N objects otherwise add list as it is
49+
foreach (List<RequestInfo> sampledRequestInfo in sampledRawData.Values)
6050
{
61-
foreach (KeyValuePair<double, RequestInfo> pair in sampledRequestInfo)
51+
if (sampledRequestInfo.Count > ClientTelemetryOptions.NetworkRequestsSampleSizeThreshold)
6252
{
63-
sampledData.Add(pair.Value);
53+
sampledRequestInfo.Sort(comparer);
54+
sampledData.AddRange(sampledRequestInfo.GetRange(
55+
index: 0,
56+
count: ClientTelemetryOptions.NetworkRequestsSampleSizeThreshold));
57+
}
58+
else
59+
{
60+
sampledData.AddRange(sampledRequestInfo);
6461
}
6562
}
6663

6764
return sampledData;
6865
}
69-
70-
private static void AddToList(DataSamplerOrderBy orderBy, RequestInfo requestInfo, List<KeyValuePair<double, RequestInfo>> sortedData)
66+
67+
private static IComparer<RequestInfo> GetComparer(DataSamplerOrderBy orderBy)
7168
{
72-
double valueToStore;
73-
if (orderBy == DataSamplerOrderBy.Latency)
69+
switch (orderBy)
7470
{
75-
valueToStore = requestInfo.GetP99Latency();
71+
case DataSamplerOrderBy.Latency:
72+
return DataLatencyComparer.Instance;
73+
case DataSamplerOrderBy.SampleCount:
74+
return DataSampleCountComparer.Instance;
75+
default:
76+
throw new ArgumentException("order by not supported. Only Supported values are Latency, SampleCount");
7677
}
77-
else if (orderBy == DataSamplerOrderBy.SampleCount)
78-
{
79-
valueToStore = requestInfo.GetSampleCount();
80-
}
81-
else
82-
{
83-
throw new Exception("order by not supported. Only Supported values are Latency, SampleCount");
84-
}
85-
86-
sortedData.Add(new KeyValuePair<double, RequestInfo>(valueToStore, requestInfo));
78+
}
79+
}
80+
81+
internal class DataLatencyComparer : IComparer<RequestInfo>
82+
{
83+
public static DataLatencyComparer Instance = new DataLatencyComparer();
84+
public int Compare(RequestInfo a, RequestInfo b)
85+
{
86+
return b.GetP99Latency().CompareTo(a.GetP99Latency());
8787
}
8888
}
8989

90-
internal class DataComparer : IComparer<KeyValuePair<double, RequestInfo>>
90+
internal class DataSampleCountComparer : IComparer<RequestInfo>
9191
{
92-
public static DataComparer Instance = new DataComparer();
93-
public int Compare(KeyValuePair<double, RequestInfo> a, KeyValuePair<double, RequestInfo> b)
92+
public static DataSampleCountComparer Instance = new DataSampleCountComparer();
93+
public int Compare(RequestInfo a, RequestInfo b)
9494
{
95-
return b.Key.CompareTo(a.Key);
95+
return b.GetSampleCount().CompareTo(a.GetSampleCount());
9696
}
9797
}
9898

Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/DataSamplerTests.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,51 @@ public void TestNetworkRequestSamplerWithoutData()
6969
Assert.AreEqual(0, DataSampler.OrderAndSample(requestInfoList, DataSamplerOrderBy.SampleCount).Count);
7070
Assert.AreEqual(0, DataSampler.OrderAndSample(requestInfoList, DataSamplerOrderBy.Latency).Count);
7171
}
72+
73+
[TestMethod]
74+
public void TestNetworkRequestSamplerForLessThanThresholdSize()
75+
{
76+
int numberOfElementsInEachGroup = ClientTelemetryOptions.NetworkRequestsSampleSizeThreshold;
77+
int numberOfGroups = 3;
78+
79+
List<RequestInfo> requestInfoList = new List<RequestInfo>();
80+
81+
for (int counter = 0; counter < 10; counter++)
82+
{
83+
RequestInfo requestInfo = new RequestInfo()
84+
{
85+
DatabaseName = "dbId " + (counter % numberOfGroups), // To repeat similar elements
86+
ContainerName = "containerId",
87+
Uri = "rntbd://host/partition/replica",
88+
StatusCode = 429,
89+
SubStatusCode = 1002,
90+
Resource = ResourceType.Document.ToResourceTypeString(),
91+
Operation = OperationType.Create.ToOperationTypeString(),
92+
Metrics = new List<MetricInfo>()
93+
{
94+
new MetricInfo(ClientTelemetryOptions.RequestLatencyName, ClientTelemetryOptions.RequestLatencyUnit)
95+
{
96+
Percentiles = new Dictionary<double, double>()
97+
{
98+
{ ClientTelemetryOptions.Percentile50, 10 },
99+
{ ClientTelemetryOptions.Percentile90, 20 },
100+
{ ClientTelemetryOptions.Percentile95, 30 },
101+
{ ClientTelemetryOptions.Percentile99, Random.Shared.Next(1, 100) },
102+
{ ClientTelemetryOptions.Percentile999, 50 }
103+
},
104+
Count = Random.Shared.Next(1, 100)
105+
}
106+
}
107+
};
108+
requestInfoList.Add(requestInfo);
109+
}
110+
111+
List<RequestInfo> sampleDataByLatency = DataSampler.OrderAndSample(requestInfoList, DataSamplerOrderBy.Latency);
112+
Assert.AreEqual(10, sampleDataByLatency.Count);
113+
114+
List<RequestInfo> sampleDataBySampleCount = DataSampler.OrderAndSample(requestInfoList, DataSamplerOrderBy.SampleCount);
115+
Assert.AreEqual(10, sampleDataBySampleCount.Count);
116+
}
117+
72118
}
73119
}

0 commit comments

Comments
 (0)