diff --git a/.chloggen/fix-exp-hist-aggregation-offsets.yaml b/.chloggen/fix-exp-hist-aggregation-offsets.yaml new file mode 100644 index 0000000000000..4cef6f95be8e5 --- /dev/null +++ b/.chloggen/fix-exp-hist-aggregation-offsets.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: coreinternal/aggregateutil + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Aggregate exponential histogram data points when different offsets are present" + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [42412] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/internal/coreinternal/aggregateutil/aggregate.go b/internal/coreinternal/aggregateutil/aggregate.go index b344abacd59c6..ea71b3ec2ca24 100644 --- a/internal/coreinternal/aggregateutil/aggregate.go +++ b/internal/coreinternal/aggregateutil/aggregate.go @@ -303,25 +303,24 @@ func mergeExponentialHistogramDataPoints(dpsMap map[string]pmetric.ExponentialHi if dp.HasMax() && dp.Max() < dps.At(i).Max() { dp.SetMax(dps.At(i).Max()) } - // Merge bucket counts. - // Note that groupExponentialHistogramDataPoints() has already ensured that we only try - // to merge exponential histograms with matching Scale and Positive/Negative Offsets, - // so the corresponding array items in BucketCounts have the same bucket boundaries. - // However, the number of buckets may differ depending on what values have been observed. - for b := 0; b < dps.At(i).Negative().BucketCounts().Len(); b++ { - if b < negatives.Len() { - negatives.SetAt(b, negatives.At(b)+dps.At(i).Negative().BucketCounts().At(b)) - } else { - negatives.Append(dps.At(i).Negative().BucketCounts().At(b)) - } + + // Check if offsets are different, indicating we need to adjust dp offsets and bucket counts after merging + negOffsetDiff := dp.Negative().Offset() != dps.At(i).Negative().Offset() + posOffsetDiff := dp.Positive().Offset() != dps.At(i).Positive().Offset() + + mergeExponentialHistogramBuckets(negatives, dps.At(i).Negative().BucketCounts(), dp.Negative().Offset(), dps.At(i).Negative().Offset()) + mergeExponentialHistogramBuckets(positives, dps.At(i).Positive().BucketCounts(), dp.Positive().Offset(), dps.At(i).Positive().Offset()) + + // Only adjust offsets and remove leading zero buckets if we had different offsets and the buckets are not empty + if negOffsetDiff && dps.At(i).Negative().BucketCounts().Len() != 0 { + dp.Negative().SetOffset(min(dp.Negative().Offset(), dps.At(i).Negative().Offset())) + trimBuckets(negatives) } - for b := 0; b < dps.At(i).Positive().BucketCounts().Len(); b++ { - if b < positives.Len() { - positives.SetAt(b, positives.At(b)+dps.At(i).Positive().BucketCounts().At(b)) - } else { - positives.Append(dps.At(i).Positive().BucketCounts().At(b)) - } + if posOffsetDiff && dps.At(i).Positive().BucketCounts().Len() != 0 { + dp.Positive().SetOffset(min(dp.Positive().Offset(), dps.At(i).Positive().Offset())) + trimBuckets(positives) } + dps.At(i).Exemplars().MoveAndAppendTo(dp.Exemplars()) if dps.At(i).StartTimestamp() < dp.StartTimestamp() { dp.SetStartTimestamp(dps.At(i).StartTimestamp()) @@ -330,6 +329,72 @@ func mergeExponentialHistogramDataPoints(dpsMap map[string]pmetric.ExponentialHi } } +func mergeExponentialHistogramBuckets(tgt, src pcommon.UInt64Slice, tgtOff, srcOff int32) { + // Both data points have the same offset - simple element-wise addition + if tgtOff == srcOff { + for b := 0; b < src.Len(); b++ { + if b < tgt.Len() { + tgt.SetAt(b, tgt.At(b)+src.At(b)) + } else { + tgt.Append(src.At(b)) + } + } + return + } + + // Source offset is less than target offset - source data point covers lower values + if srcOff < tgtOff { + for b := src.Len() - 1; b >= 0; b-- { + count := src.At(b) + if count > 0 { + tgt.Append(0) + for i := tgt.Len() - 1; i > 0; i-- { + tgt.SetAt(i, tgt.At(i-1)) + } + tgt.SetAt(0, count) + } + } + return + } + + // Source offset is greater than target offset - source data point covers higher values + for b := 0; b < src.Len(); b++ { + count := src.At(b) + if count == 0 { + continue + } + + idx := b + int(srcOff-tgtOff) + if idx >= 0 { + if idx < tgt.Len() { + tgt.SetAt(idx, tgt.At(idx)+count) + } else { + for tgt.Len() <= idx { + tgt.Append(0) + } + tgt.SetAt(idx, tgt.At(idx)+count) + } + } + } +} + +func trimBuckets(buckets pcommon.UInt64Slice) { + zeroCount := 0 + for i := 0; i < buckets.Len() && buckets.At(i) == 0; i++ { + zeroCount++ + } + + if zeroCount == 0 { + return + } + + newBuckets := make([]uint64, buckets.Len()-zeroCount) + for i := zeroCount; i < buckets.Len(); i++ { + newBuckets[i-zeroCount] = buckets.At(i) + } + buckets.FromRaw(newBuckets) +} + func groupNumberDataPoints(dps pmetric.NumberDataPointSlice, useStartTime bool, dpsByAttrsAndTs map[string]pmetric.NumberDataPointSlice, ) { @@ -373,9 +438,8 @@ func groupExponentialHistogramDataPoints(dps pmetric.ExponentialHistogramDataPoi ) { for i := 0; i < dps.Len(); i++ { dp := dps.At(i) - keyHashParts := make([]any, 0, 5) - keyHashParts = append(keyHashParts, dp.Scale(), dp.HasMin(), dp.HasMax(), uint32(dp.Flags()), dp.Negative().Offset(), - dp.Positive().Offset()) + keyHashParts := make([]any, 0, 4) + keyHashParts = append(keyHashParts, dp.Scale(), dp.HasMin(), dp.HasMax(), uint32(dp.Flags())) if useStartTime { keyHashParts = append(keyHashParts, dp.StartTimestamp().String()) } diff --git a/internal/coreinternal/aggregateutil/aggregate_test.go b/internal/coreinternal/aggregateutil/aggregate_test.go index a4e736e66302e..f83ea2d7c5fc1 100644 --- a/internal/coreinternal/aggregateutil/aggregate_test.go +++ b/internal/coreinternal/aggregateutil/aggregate_test.go @@ -275,7 +275,7 @@ func Test_GroupDataPoints(t *testing.T) { hashHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), false, false, 0) - hashExpHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), 0, false, false, 0, 0, 0) + hashExpHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), 0, false, false, 0) tests := []struct { name string @@ -395,7 +395,7 @@ func Test_MergeDataPoints(t *testing.T) { hashHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), false, false, 0) - hashExpHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), 0, false, false, 0, 0, 0) + hashExpHistogram := dataPointHashKey(mapAttr, pcommon.NewTimestampFromTime(time.Time{}), 0, false, false, 0) tests := []struct { name string @@ -499,6 +499,44 @@ func Test_MergeDataPoints(t *testing.T) { return m }, }, + { + name: "exp histogram with different offsets", + aggGroup: AggGroups{ + expHistogram: map[string]pmetric.ExponentialHistogramDataPointSlice{ + hashExpHistogram: testDataExpHistogramWithDifferentOffsets(), + }, + }, + typ: Sum, + want: func() pmetric.Metric { + m := pmetric.NewMetric() + s := m.SetEmptyExponentialHistogram() + d := s.DataPoints().AppendEmpty() + d.Attributes().PutStr("attr1", "val1") + d.SetCount(12) + d.SetSum(0) + d.SetZeroCount(3) + // First datapoint: positive offset 0, buckets [1, 2] + // Second datapoint: positive offset 1, buckets [3, 4] (represents [0, 3, 4]) + // Third datapoint: positive offset 5, buckets [1] (represents [0, 0, 0, 0, 0, 1]) + // Result: [1+0+0, 2+3+0, 4+0+0, 0+0+0, 0+0+0, 0+0+1] = [1, 5, 4, 0, 0, 1] + d.Positive().BucketCounts().Append(1, 5, 4, 0, 0, 1) + d.Positive().SetOffset(0) + + // First data point: negative offset 3, buckets [1, 2] (represents [0, 0, 0, 1, 2]) + // Second data point: negative offset 1, buckets [5, 6] (represents [0, 5, 6]) + // Third data point: negative side empty + // Result: [0+0, 0+5, 0+6, 1+0, 2+0] = [0, 5, 6, 1, 2] (with offset 1: [5, 6, 1, 2]) + d.Negative().BucketCounts().Append(5, 6, 1, 2) + d.Negative().SetOffset(1) + + return m + }, + in: func() pmetric.Metric { + m := pmetric.NewMetric() + m.SetEmptyExponentialHistogram() + return m + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -584,3 +622,36 @@ func testDataExpHistogramDouble() pmetric.ExponentialHistogramDataPointSlice { return dataWant } + +func testDataExpHistogramWithDifferentOffsets() pmetric.ExponentialHistogramDataPointSlice { + dataWant := pmetric.NewExponentialHistogramDataPointSlice() + + dWant := dataWant.AppendEmpty() + dWant.Attributes().PutStr("attr1", "val1") + dWant.SetCount(6) + dWant.SetZeroCount(2) + dWant.Positive().SetOffset(0) + dWant.Positive().BucketCounts().Append(1, 2) // [1, 2] + dWant.Negative().SetOffset(3) + dWant.Negative().BucketCounts().Append(1, 2) // [0, 0, 0, 1, 2] + + // Different offsets + dWant2 := dataWant.AppendEmpty() + dWant2.Attributes().PutStr("attr1", "val1") + dWant2.SetCount(5) + dWant2.SetZeroCount(1) + dWant2.Positive().SetOffset(1) + dWant2.Positive().BucketCounts().Append(3, 4) // [0, 3, 4] + dWant2.Negative().SetOffset(1) + dWant2.Negative().BucketCounts().Append(5, 6) // [0, 5, 6] + + // Set large offset for positive, no offset for negative + dWant3 := dataWant.AppendEmpty() + dWant3.Attributes().PutStr("attr1", "val1") + dWant3.SetCount(1) + dWant3.SetZeroCount(0) + dWant3.Positive().SetOffset(5) + dWant3.Positive().BucketCounts().Append(1) // [0, 0, 0, 0, 0, 1] + + return dataWant +}