Skip to content

Commit 23c4329

Browse files
awsecscontainermetrics: update CpuUtilized metric calculation (#1283)
We had our internal discussion and we decided to report `CpuUtilized` metric in percentage. We studied our new calculation and verified with domain expert. [Update] As the reviewer's suggestion, will create a separate PR for the following one. Earlier we decided to add task and container metadata as resource attributes. However, CloudWatch `awsemf` exporter reads dimensions only from metric labels. As this exporter is major target for `awsecscontainermetrics` receiver, we need to add these metadata as metric labels. I discussed this in our OTel Metric SIG meeting. We also discussed internally and agreed to add them as metric labels as well. This change will address both of these issues. **Link to tracking Issue:** #1282
1 parent 9349e4c commit 23c4329

File tree

8 files changed

+59
-15
lines changed

8 files changed

+59
-15
lines changed

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/accumulator.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ func (acc *metricDataAccumulator) getMetricsData(containerStatsMap map[string]Co
4040
containerMetrics.MemoryReserved = *containerMetadata.Limits.Memory
4141
containerMetrics.CPUReserved = *containerMetadata.Limits.CPU
4242

43+
if containerMetrics.CPUReserved > 0 {
44+
containerMetrics.CPUUtilized = (containerMetrics.CPUUtilized / containerMetrics.CPUReserved)
45+
}
46+
4347
containerResource := containerResource(containerMetadata)
4448
for k, v := range taskResource.Labels {
4549
containerResource.Labels[k] = v
@@ -65,6 +69,12 @@ func (acc *metricDataAccumulator) getMetricsData(containerStatsMap map[string]Co
6569
taskMetrics.CPUReserved = *metadata.Limits.CPU
6670
}
6771

72+
// taskMetrics.CPUReserved cannot be zero. In ECS, user needs to set CPU limit
73+
// at least in one place (either in task level or in container level). If the
74+
// task level CPULimit is not present, we calculate it from the summation of
75+
// all container CPU limits.
76+
taskMetrics.CPUUtilized = ((taskMetrics.CPUUsageInVCPU / taskMetrics.CPUReserved) * 100)
77+
6878
acc.accumulate(
6979
taskResource,
7080
convertToOCMetrics(TaskPrefix, taskMetrics, nil, nil, timestamp),

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/constant.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const (
4848
AttributeCPUOnlines = "cpu.onlines"
4949
AttributeCPUReserved = "cpu.reserved"
5050
AttributeCPUUtilized = "cpu.utilized"
51+
AttributeCPUUsageInVCPU = "cpu.usage.vcpu"
5152

5253
AttributeNetworkRateRx = "network.rate.rx"
5354
AttributeNetworkRateTx = "network.rate.tx"
@@ -70,4 +71,5 @@ const (
7071
UnitBytesPerSec = "Bytes/Sec"
7172
UnitCount = "Count"
7273
UnitVCpu = "vCPU"
74+
UnitPercent = "Percent"
7375
)

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/docker_stats.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,21 @@
1414

1515
package awsecscontainermetrics
1616

17+
import "time"
18+
1719
// ContainerStats defines the structure for container stats
1820
type ContainerStats struct {
19-
Name string `json:"name"`
20-
ID string `json:"id"`
21+
Name string `json:"name"`
22+
ID string `json:"id"`
23+
Read time.Time `json:"read"`
24+
PreviousRead time.Time `json:"preread"`
2125

2226
Memory MemoryStats `json:"memory_stats,omitempty"`
2327
Disk DiskStats `json:"blkio_stats,omitempty"`
2428
Network map[string]NetworkStats `json:"networks,omitempty"`
2529
NetworkRate NetworkRateStats `json:"network_rate_stats,omitempty"`
2630
CPU CPUStats `json:"cpu_stats,omitempty"`
31+
PreviousCPU CPUStats `json:"precpu_stats,omitempty"`
2732
}
2833

2934
// MemoryStats defines the memory stats

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/ecs_metrics.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type ECSMetrics struct {
3030
NumOfCPUCores uint64
3131
CPUReserved float64
3232
CPUUtilized float64
33+
CPUUsageInVCPU float64
3334

3435
NetworkRateRxBytesPerSecond float64
3536
NetworkRateTxBytesPerSecond float64

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/metrics_helper.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,15 @@ package awsecscontainermetrics
1717
// getContainerMetrics generate ECS Container metrics from Container stats
1818
func getContainerMetrics(stats ContainerStats) ECSMetrics {
1919
memoryUtilizedInMb := (*stats.Memory.Usage - stats.Memory.Stats["cache"]) / BytesInMiB
20-
2120
numOfCores := (uint64)(len(stats.CPU.CPUUsage.PerCPUUsage))
21+
timeDiffSinceLastRead := (float64)(stats.Read.Sub(stats.PreviousRead).Nanoseconds())
2222

23-
// TODO: match with ECS Agent calculation and modify if needed
24-
cpuUtilized := (float64)(*stats.CPU.CPUUsage.TotalUsage / numOfCores / 1024)
23+
cpuUsageInVCpu := float64(0)
24+
if timeDiffSinceLastRead > 0 {
25+
cpuDelta := (float64)(*stats.CPU.CPUUsage.TotalUsage - *stats.PreviousCPU.CPUUsage.TotalUsage)
26+
cpuUsageInVCpu = cpuDelta / timeDiffSinceLastRead
27+
}
28+
cpuUtilized := cpuUsageInVCpu * 100
2529

2630
netStatArray := getNetworkStats(stats.Network)
2731

@@ -41,6 +45,7 @@ func getContainerMetrics(stats ContainerStats) ECSMetrics {
4145
m.NumOfCPUCores = numOfCores
4246
m.CPUOnlineCpus = *stats.CPU.OnlineCpus
4347
m.SystemCPUUsage = *stats.CPU.SystemCPUUsage
48+
m.CPUUsageInVCPU = cpuUsageInVCpu
4449
m.CPUUtilized = cpuUtilized
4550

4651
if stats.NetworkRate == (NetworkRateStats{}) {
@@ -119,8 +124,8 @@ func aggregateTaskMetrics(taskMetrics *ECSMetrics, conMetrics ECSMetrics) {
119124
taskMetrics.NumOfCPUCores += conMetrics.NumOfCPUCores
120125
taskMetrics.CPUOnlineCpus += conMetrics.CPUOnlineCpus
121126
taskMetrics.SystemCPUUsage += conMetrics.SystemCPUUsage
122-
taskMetrics.CPUUtilized += conMetrics.CPUUtilized
123127
taskMetrics.CPUReserved += conMetrics.CPUReserved
128+
taskMetrics.CPUUsageInVCPU += conMetrics.CPUUsageInVCPU
124129

125130
taskMetrics.NetworkRateRxBytesPerSecond += conMetrics.NetworkRateRxBytesPerSecond
126131
taskMetrics.NetworkRateTxBytesPerSecond += conMetrics.NetworkRateTxBytesPerSecond

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/metrics_helper_test.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package awsecscontainermetrics
1616

1717
import (
1818
"testing"
19+
"time"
1920

2021
"github.com/stretchr/testify/require"
2122
)
@@ -77,14 +78,33 @@ func TestGetContainerAndTaskMetrics(t *testing.T) {
7778
CPUUtilized: &v,
7879
CPUReserved: &v,
7980
}
81+
82+
previousCPUUsage := CPUUsage{
83+
TotalUsage: &v,
84+
UsageInKernelmode: &v,
85+
UsageInUserMode: &v,
86+
PerCPUUsage: percpu,
87+
}
88+
89+
previousCPUStats := CPUStats{
90+
CPUUsage: previousCPUUsage,
91+
OnlineCpus: &v,
92+
SystemCPUUsage: &v,
93+
CPUUtilized: &v,
94+
CPUReserved: &v,
95+
}
96+
8097
containerStats := ContainerStats{
81-
Name: "test",
82-
ID: "001",
83-
Memory: mem,
84-
Disk: disk,
85-
Network: net,
86-
NetworkRate: netRate,
87-
CPU: cpuStats,
98+
Name: "test",
99+
ID: "001",
100+
Read: time.Now(),
101+
PreviousRead: time.Now().Add(-10 * time.Second),
102+
Memory: mem,
103+
Disk: disk,
104+
Network: net,
105+
NetworkRate: netRate,
106+
CPU: cpuStats,
107+
PreviousCPU: previousCPUStats,
88108
}
89109

90110
containerMetrics := getContainerMetrics(containerStats)

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/translator.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ func convertToOCMetrics(prefix string, m ECSMetrics, labelKeys []*metricspb.Labe
3434
intGauge(prefix+AttributeCPUCores, UnitCount, &m.NumOfCPUCores, labelKeys, labelValues),
3535
intGauge(prefix+AttributeCPUOnlines, UnitCount, &m.CPUOnlineCpus, labelKeys, labelValues),
3636
intCumulative(prefix+AttributeCPUSystemUsage, UnitNanoSecond, &m.SystemCPUUsage, labelKeys, labelValues),
37-
doubleGauge(prefix+AttributeCPUUtilized, UnitVCpu, &m.CPUUtilized, labelKeys, labelValues),
37+
doubleGauge(prefix+AttributeCPUUtilized, UnitPercent, &m.CPUUtilized, labelKeys, labelValues),
3838
doubleGauge(prefix+AttributeCPUReserved, UnitVCpu, &m.CPUReserved, labelKeys, labelValues),
39+
doubleGauge(prefix+AttributeCPUUsageInVCPU, UnitVCpu, &m.CPUUsageInVCPU, labelKeys, labelValues),
3940

4041
doubleGauge(prefix+AttributeNetworkRateRx, UnitBytesPerSec, &m.NetworkRateRxBytesPerSecond, labelKeys, labelValues),
4142
doubleGauge(prefix+AttributeNetworkRateTx, UnitBytesPerSec, &m.NetworkRateTxBytesPerSecond, labelKeys, labelValues),

receiver/awsecscontainermetricsreceiver/awsecscontainermetrics/translator_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func TestConvertToOTMetrics(t *testing.T) {
3939
}
4040

4141
metrics := convertToOCMetrics("container.", m, labelKeys, labelValues, timestamp)
42-
require.EqualValues(t, 25, len(metrics))
42+
require.EqualValues(t, 26, len(metrics))
4343
}
4444

4545
func TestIntGauge(t *testing.T) {

0 commit comments

Comments
 (0)