Skip to content

Commit af3a22c

Browse files
committed
feat: expire stale advanced metrics after not being updated for some time
Signed-off-by: Matthew McKeen <[email protected]>
1 parent 18da0e9 commit af3a22c

25 files changed

+541
-59
lines changed

crd/api/v1alpha1/metricsconfiguration_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ type MetricsContextOptions struct {
4141
// +optional
4242
// +listType=set
4343
AdditionalLabels []string `json:"additionalLabels,omitempty"`
44+
// TTL represents the time-to-live of the metrics collected
45+
// Metrics which have not been updated within the TTL will be removed from export
46+
// +optional
47+
TTL string `json:"ttl,omitempty"`
4448
}
4549

4650
// MetricsNamespaces indicates the namespaces to include or exclude in metric collection

crd/api/v1alpha1/validations/validate_metricconfiguration.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package validations
77

88
import (
99
"fmt"
10+
"time"
1011

1112
"github.com/microsoft/retina/crd/api/v1alpha1"
1213
"github.com/microsoft/retina/pkg/utils"
@@ -40,6 +41,15 @@ func MetricsSpec(metricsSpec v1alpha1.MetricsSpec) error {
4041
if !utils.IsAdvancedMetric(contextOption.MetricName) {
4142
return fmt.Errorf("%s is not a valid metric", contextOption.MetricName)
4243
}
44+
if contextOption.TTL != "" {
45+
ttl, err := time.ParseDuration(contextOption.TTL)
46+
if err != nil {
47+
return fmt.Errorf("invalid TTL format for metric %s: %v", contextOption.MetricName, err)
48+
}
49+
if ttl < 0 {
50+
return fmt.Errorf("TTL cannot be negative for metric %s", contextOption.MetricName)
51+
}
52+
}
4353
}
4454

4555
err := MetricsNamespaces(metricsSpec.Namespaces)
@@ -152,10 +162,13 @@ func MetricsContextOptionsCompare(old, new []v1alpha1.MetricsContextOptions) boo
152162
return false
153163
}
154164

155-
if !utils.CompareStringSlice(oldContextOption.AdditionalLabels, newContextOption.AdditionalLabels) {
165+
if oldContextOption.TTL != newContextOption.TTL {
156166
return false
157167
}
158168

169+
if !utils.CompareStringSlice(oldContextOption.AdditionalLabels, newContextOption.AdditionalLabels) {
170+
return false
171+
}
159172
}
160173

161174
return true

crd/api/v1alpha1/validations/validate_metricconfiguration_test.go

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,46 @@ func TestMetricsConfiguration(t *testing.T) {
9898
},
9999
wantErr: false,
100100
},
101+
{
102+
name: "valid metrics crd with TTL",
103+
obj: &v1alpha1.MetricsConfiguration{
104+
ObjectMeta: metav1.ObjectMeta{
105+
Name: "metricsconfig",
106+
},
107+
Spec: v1alpha1.MetricsSpec{
108+
ContextOptions: []v1alpha1.MetricsContextOptions{
109+
{
110+
MetricName: "drop_count",
111+
TTL: "24h",
112+
},
113+
},
114+
Namespaces: v1alpha1.MetricsNamespaces{
115+
Exclude: []string{"kube-system"},
116+
},
117+
},
118+
},
119+
wantErr: false,
120+
},
121+
{
122+
name: "invalid metrics crd with TTL",
123+
obj: &v1alpha1.MetricsConfiguration{
124+
ObjectMeta: metav1.ObjectMeta{
125+
Name: "metricsconfig",
126+
},
127+
Spec: v1alpha1.MetricsSpec{
128+
ContextOptions: []v1alpha1.MetricsContextOptions{
129+
{
130+
MetricName: "drop_count",
131+
TTL: "24",
132+
},
133+
},
134+
Namespaces: v1alpha1.MetricsNamespaces{
135+
Exclude: []string{"kube-system"},
136+
},
137+
},
138+
},
139+
wantErr: true,
140+
},
101141
{
102142
name: "invalid metrics crd with random metric name",
103143
obj: &v1alpha1.MetricsConfiguration{
@@ -348,6 +388,125 @@ func TestCompare(t *testing.T) {
348388
},
349389
equal: true,
350390
},
391+
{
392+
name: "valid test 6",
393+
old: &v1alpha1.MetricsConfiguration{
394+
ObjectMeta: metav1.ObjectMeta{
395+
Name: "metricsconfig",
396+
},
397+
Spec: v1alpha1.MetricsSpec{
398+
ContextOptions: []v1alpha1.MetricsContextOptions{
399+
{
400+
MetricName: "drop_count",
401+
SourceLabels: []string{"ns", "ip", "port"},
402+
TTL: "24h",
403+
},
404+
},
405+
Namespaces: v1alpha1.MetricsNamespaces{
406+
Include: []string{"default", "test"},
407+
Exclude: []string{"kube-system"},
408+
},
409+
},
410+
},
411+
new: &v1alpha1.MetricsConfiguration{
412+
ObjectMeta: metav1.ObjectMeta{
413+
Name: "metricsconfig",
414+
},
415+
Spec: v1alpha1.MetricsSpec{
416+
ContextOptions: []v1alpha1.MetricsContextOptions{
417+
{
418+
MetricName: "drop_count",
419+
SourceLabels: []string{"ip", "port", "ns"},
420+
},
421+
},
422+
Namespaces: v1alpha1.MetricsNamespaces{
423+
Include: []string{"default", "test"},
424+
Exclude: []string{"kube-system"},
425+
},
426+
},
427+
},
428+
equal: false,
429+
},
430+
{
431+
name: "valid test 7",
432+
old: &v1alpha1.MetricsConfiguration{
433+
ObjectMeta: metav1.ObjectMeta{
434+
Name: "metricsconfig",
435+
},
436+
Spec: v1alpha1.MetricsSpec{
437+
ContextOptions: []v1alpha1.MetricsContextOptions{
438+
{
439+
MetricName: "drop_count",
440+
SourceLabels: []string{"ns", "ip", "port"},
441+
TTL: "24h",
442+
},
443+
},
444+
Namespaces: v1alpha1.MetricsNamespaces{
445+
Include: []string{"default", "test"},
446+
Exclude: []string{"kube-system"},
447+
},
448+
},
449+
},
450+
new: &v1alpha1.MetricsConfiguration{
451+
ObjectMeta: metav1.ObjectMeta{
452+
Name: "metricsconfig",
453+
},
454+
Spec: v1alpha1.MetricsSpec{
455+
ContextOptions: []v1alpha1.MetricsContextOptions{
456+
{
457+
MetricName: "drop_count",
458+
SourceLabels: []string{"ip", "port", "ns"},
459+
TTL: "24h",
460+
},
461+
},
462+
Namespaces: v1alpha1.MetricsNamespaces{
463+
Include: []string{"default", "test"},
464+
Exclude: []string{"kube-system"},
465+
},
466+
},
467+
},
468+
equal: true,
469+
},
470+
{
471+
name: "valid test 8",
472+
old: &v1alpha1.MetricsConfiguration{
473+
ObjectMeta: metav1.ObjectMeta{
474+
Name: "metricsconfig",
475+
},
476+
Spec: v1alpha1.MetricsSpec{
477+
ContextOptions: []v1alpha1.MetricsContextOptions{
478+
{
479+
MetricName: "drop_count",
480+
SourceLabels: []string{"ns", "ip", "port"},
481+
TTL: "24h",
482+
},
483+
},
484+
Namespaces: v1alpha1.MetricsNamespaces{
485+
Include: []string{"default", "test"},
486+
Exclude: []string{"kube-system"},
487+
},
488+
},
489+
},
490+
new: &v1alpha1.MetricsConfiguration{
491+
ObjectMeta: metav1.ObjectMeta{
492+
Name: "metricsconfig",
493+
},
494+
Spec: v1alpha1.MetricsSpec{
495+
ContextOptions: []v1alpha1.MetricsContextOptions{
496+
{
497+
MetricName: "drop_count",
498+
SourceLabels: []string{"ip", "port", "ns"},
499+
TTL: "12h",
500+
},
501+
},
502+
Namespaces: v1alpha1.MetricsNamespaces{
503+
Include: []string{"default", "test"},
504+
Exclude: []string{"kube-system"},
505+
},
506+
},
507+
},
508+
equal: false,
509+
},
351510
}
352511

353512
for _, tt := range tests {

crd/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/standard/manifests/controller/helm/retina/crds/retina.sh_captures.yaml

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
controller-gen.kubebuilder.io/version: v0.15.0
6+
controller-gen.kubebuilder.io/version: v0.16.5
77
name: captures.retina.sh
88
spec:
99
group: retina.sh
@@ -53,6 +53,14 @@ spec:
5353
should continue for.
5454
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
5555
type: string
56+
interfaces:
57+
description: |-
58+
Interfaces specifies the network interfaces on which to capture packets.
59+
If specified, captures only on the listed interfaces.
60+
If empty, captures on all interfaces by default.
61+
items:
62+
type: string
63+
type: array
5664
maxCaptureSize:
5765
default: 100
5866
description: MaxCaptureSize limits the capture file to MB
@@ -290,10 +298,14 @@ spec:
290298
description: SecretName is the name of secret which stores
291299
S3 compliant storage access key and secret key.
292300
type: string
301+
required:
302+
- bucket
303+
- secretName
293304
type: object
294305
type: object
295306
required:
296307
- captureConfiguration
308+
- outputConfiguration
297309
type: object
298310
status:
299311
description: CaptureStatus describes the status of the capture.
@@ -310,16 +322,8 @@ spec:
310322
type: string
311323
conditions:
312324
items:
313-
description: "Condition contains details for one aspect of the current
314-
state of this API Resource.\n---\nThis struct is intended for
315-
direct use as an array at the field path .status.conditions. For
316-
example,\n\n\n\ttype FooStatus struct{\n\t // Represents the
317-
observations of a foo's current state.\n\t // Known .status.conditions.type
318-
are: \"Available\", \"Progressing\", and \"Degraded\"\n\t //
319-
+patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t
320-
\ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\"
321-
patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
322-
\ // other fields\n\t}"
325+
description: Condition contains details for one aspect of the current
326+
state of this API Resource.
323327
properties:
324328
lastTransitionTime:
325329
description: |-
@@ -360,12 +364,7 @@ spec:
360364
- Unknown
361365
type: string
362366
type:
363-
description: |-
364-
type of condition in CamelCase or in foo.example.com/CamelCase.
365-
---
366-
Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
367-
useful (see .node.status.conditions), the ability to deconflict is important.
368-
The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
367+
description: type of condition in CamelCase or in foo.example.com/CamelCase.
369368
maxLength: 316
370369
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
371370
type: string

deploy/standard/manifests/controller/helm/retina/crds/retina.sh_metricsconfigurations.yaml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
controller-gen.kubebuilder.io/version: v0.15.0
6+
controller-gen.kubebuilder.io/version: v0.16.5
77
name: metricsconfigurations.retina.sh
88
spec:
99
group: retina.sh
@@ -75,6 +75,11 @@ spec:
7575
type: string
7676
type: array
7777
x-kubernetes-list-type: set
78+
ttl:
79+
description: |-
80+
TTL represents the time-to-live of the metrics collected
81+
Metrics which have not been updated within the TTL will be removed from export
82+
type: string
7883
required:
7984
- metricName
8085
type: object
@@ -136,6 +141,11 @@ spec:
136141
type: string
137142
type: array
138143
x-kubernetes-list-type: set
144+
ttl:
145+
description: |-
146+
TTL represents the time-to-live of the metrics collected
147+
Metrics which have not been updated within the TTL will be removed from export
148+
type: string
139149
required:
140150
- metricName
141151
type: object

deploy/standard/manifests/controller/helm/retina/crds/retina.sh_retinaendpoints.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
controller-gen.kubebuilder.io/version: v0.15.0
6+
controller-gen.kubebuilder.io/version: v0.16.5
77
name: retinaendpoints.retina.sh
88
spec:
99
group: retina.sh

deploy/standard/manifests/controller/helm/retina/crds/retina.sh_tracesconfigurations.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
33
kind: CustomResourceDefinition
44
metadata:
55
annotations:
6-
controller-gen.kubebuilder.io/version: v0.15.0
6+
controller-gen.kubebuilder.io/version: v0.16.5
77
name: tracesconfigurations.retina.sh
88
spec:
99
group: retina.sh

docs/03-Metrics/configuration.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ You can enable/disable metrics by including/omitting their Plugin from `enabledP
55
Via [MetricsConfiguration CRD](../05-Concepts/CRDs/MetricsConfiguration.md), you can further customize the following for your enabled plugins:
66

77
- Which metrics to include
8-
- Which metadata to include for a metric.
8+
- Which metadata to include for a metric
9+
- Time-to-live for a metric
910

1011
**Note**: If you enable [Annotations](./annotations.md), you cannot use the `MetricsConfiguration` CRD to specify which Pods to observe.

docs/05-Concepts/CRDs/MetricsConfiguration.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ The `MetricsConfiguration` CRD is defined with the following specifications:
2424
- `destinationLabels`: Represents the destination context labels, such as IP, Pod, port, workload (deployment/replicaset/statefulset/daemonset).
2525
- `metricName`: Indicates the name of the metric.
2626
- `sourceLabels`: Represents the source context labels, such as IP, Pod, port.
27+
- `ttl`: Represents the time-to-live for the metric. If there are no metric updates for a particular set of context labels for this duration the metric will be removed from export. The value of `ttl` must be a valid Golang `time.Duration` string and non-negative. A zero `ttl` (the default) means that metrics are never removed from export.
2728

2829
- **spec.namespaces:** Specifies the namespaces to include or exclude in metric collection. It includes the following properties:
2930
- `exclude`: Specifies namespaces to be excluded from metric collection.
@@ -51,6 +52,7 @@ spec:
5152
- port
5253
additionalLabels:
5354
- direction
55+
ttl: 24h
5456
- metricName: forward_count
5557
sourceLabels:
5658
- ip

0 commit comments

Comments
 (0)