Skip to content

Commit 76f81f8

Browse files
(manual census reporting) add CLI subcommand and API endpoint for census utilization
1 parent d551c75 commit 76f81f8

File tree

31 files changed

+1038
-534
lines changed

31 files changed

+1038
-534
lines changed

.changelog/22843.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
```release-note:enhancement
2+
cmd: Added new subcommand `consul operator utilization [-today-only] [-message] [-y]` to generate a bundle with census utilization snapshot. Main flow is implemented in consul-enterprise
3+
http: Added a new API Handler for `/v1/operator/utilization`. Core functionality to be implemented in consul-enterprise
4+
agent: Always enabled census metrics collection with configurable option to export it to Hashicorp Reporting
5+
```

agent/agent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,6 +1602,7 @@ func newConsulConfig(runtimeCfg *config.RuntimeConfig, logger hclog.Logger) (*co
16021602
cfg.Cloud = runtimeCfg.Cloud
16031603

16041604
cfg.Reporting.License.Enabled = runtimeCfg.Reporting.License.Enabled
1605+
cfg.Reporting.SnapshotRetentionTime = runtimeCfg.Reporting.SnapshotRetentionTime
16051606

16061607
cfg.ServerRejoinAgeMax = runtimeCfg.ServerRejoinAgeMax
16071608
cfg.EnableXDSLoadBalancing = runtimeCfg.EnableXDSLoadBalancing

agent/config/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1000,5 +1000,6 @@ type License struct {
10001000
}
10011001

10021002
type Reporting struct {
1003-
License License `mapstructure:"license"`
1003+
License License `mapstructure:"license"`
1004+
SnapshotRetentionTime *string `mapstructure:"snapshot_retention_time" json:"snapshot_retention_time,omitempty"`
10041005
}

agent/config/runtime.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1541,7 +1541,8 @@ type LicenseConfig struct {
15411541
}
15421542

15431543
type ReportingConfig struct {
1544-
License LicenseConfig
1544+
License LicenseConfig
1545+
SnapshotRetentionTime time.Duration
15451546
}
15461547

15471548
type AutoConfig struct {

agent/config/testdata/TestRuntimeConfig_Sanitize.golden

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@
308308
"Reporting": {
309309
"License": {
310310
"Enabled": false
311-
}
311+
},
312+
"SnapshotRetentionTime": "0s"
312313
},
313314
"RequestLimitsMode": 0,
314315
"RequestLimitsReadRate": 0,
@@ -525,4 +526,4 @@
525526
"Watches": [],
526527
"XDSUpdateRateLimit": 0,
527528
"EnableXDSLoadBalancing":true
528-
}
529+
}

agent/consul/config.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
"github.com/hashicorp/consul/agent/checks"
1919
consulrate "github.com/hashicorp/consul/agent/consul/rate"
20+
"github.com/hashicorp/consul/agent/consul/reporting"
2021
hcpconfig "github.com/hashicorp/consul/agent/hcp/config"
2122
"github.com/hashicorp/consul/agent/structs"
2223
"github.com/hashicorp/consul/internal/gossip/libserf"
@@ -597,6 +598,8 @@ func DefaultConfig() *Config {
597598
ServerRejoinAgeMax: 24 * 7 * time.Hour,
598599
}
599600

601+
conf.Reporting.SnapshotRetentionTime = reporting.DefaultSnapshotRetention
602+
600603
// Increase our reap interval to 3 days instead of 24h.
601604
conf.SerfLANConfig.ReconnectTimeout = 3 * 24 * time.Hour
602605
conf.SerfWANConfig.ReconnectTimeout = 3 * 24 * time.Hour
@@ -732,5 +735,6 @@ type License struct {
732735
}
733736

734737
type Reporting struct {
735-
License License
738+
License License
739+
SnapshotRetentionTime time.Duration
736740
}

agent/consul/leader.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,9 +340,8 @@ func (s *Server) establishLeadership(ctx context.Context) error {
340340
s.startLogVerification(ctx)
341341
}
342342

343-
if s.config.Reporting.License.Enabled && s.reportingManager != nil {
344-
s.reportingManager.StartReportingAgent()
345-
}
343+
// The reporting manager will start the metrics collection but will only report if config.reporting.license.enabled is true
344+
s.reportingManager.StartReportingAgent()
346345

347346
s.logger.Debug("successfully established leadership", "duration", time.Since(start))
348347
return nil

agent/consul/reporting/reporting.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,45 +11,75 @@ import (
1111
"github.com/hashicorp/consul/agent/structs"
1212
"github.com/hashicorp/go-hclog"
1313
"github.com/hashicorp/go-memdb"
14+
"github.com/hashicorp/go-retryablehttp"
1415
)
1516

1617
type ReportingManager struct {
17-
logger hclog.Logger
18-
server ServerDelegate
19-
stateProvider StateDelegate
20-
tickerInterval time.Duration
18+
logger hclog.Logger
19+
clusterId string
20+
autoReporting bool
21+
server ServerDelegate
22+
stateProvider StateDelegate
23+
tickerInterval time.Duration
24+
manualSnapshotInterval time.Duration
25+
snapshotRetention time.Duration
26+
customerID string
2127
EntDeps
2228
sync.RWMutex
29+
manualHTTPClient *retryablehttp.Client
30+
manualServiceAddress string
2331
}
2432

2533
const (
2634
SystemMetadataReportingProcessID = "reporting-process-id"
2735
ReportingInterval = 1 * time.Hour
2836
)
2937

38+
const (
39+
// ManualSnapshotInterval controls how often we persist manual census snapshots.
40+
ManualSnapshotInterval = 24 * time.Hour
41+
// DefaultSnapshotRetention is the default retention period for manual census snapshots.
42+
DefaultSnapshotRetention = 9600 * time.Hour // 400 days
43+
)
44+
3045
//go:generate mockery --name ServerDelegate --inpackage
3146
type ServerDelegate interface {
3247
GetSystemMetadata(key string) (string, error)
3348
SetSystemMetadataKey(key, val string) error
3449
IsLeader() bool
50+
ApplyCensusRequest(req *structs.CensusRequest) error
3551
}
3652

3753
type StateDelegate interface {
54+
// Metrics methods
3855
NodeUsage() (uint64, state.NodeUsage, error)
3956
ServiceUsage(ws memdb.WatchSet, tenantUsage bool) (uint64, structs.ServiceUsage, error)
57+
// Census methods
58+
CensusPut(idx uint64, req *structs.CensusRequest) error
59+
CensusPrune(idx uint64, cutoff time.Time) (int, error)
60+
CensusListAll() (uint64, []*state.CensusSnapshot, error)
4061
}
4162

42-
func NewReportingManager(logger hclog.Logger, deps EntDeps, server ServerDelegate, stateProvider StateDelegate) *ReportingManager {
63+
func NewReportingManager(logger hclog.Logger, clusterId string, autoReporting bool, deps EntDeps, server ServerDelegate, stateProvider StateDelegate, snapshotRetention time.Duration) *ReportingManager {
64+
if snapshotRetention <= 0 {
65+
snapshotRetention = DefaultSnapshotRetention
66+
}
67+
4368
rm := &ReportingManager{
44-
logger: logger.Named("reporting"),
45-
server: server,
46-
stateProvider: stateProvider,
47-
tickerInterval: ReportingInterval,
69+
logger: logger.Named("reporting"),
70+
clusterId: clusterId,
71+
autoReporting: autoReporting,
72+
server: server,
73+
stateProvider: stateProvider,
74+
tickerInterval: ReportingInterval,
75+
manualSnapshotInterval: ManualSnapshotInterval,
76+
snapshotRetention: snapshotRetention,
4877
}
4978
err := rm.initEnterpriseReporting(deps)
5079
if err != nil {
5180
rm.logger.Error("Error initializing reporting manager", "error", err)
5281
return nil
5382
}
83+
rm.logger.Debug("Created reporting manager")
5484
return rm
5585
}

agent/consul/reporting/reporting_ce.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ func (rm *ReportingManager) StopReportingAgent() error {
2626
return nil
2727
}
2828

29-
func (m *ReportingManager) Run(ctx context.Context) {
29+
func (rm *ReportingManager) RunMetricsWriter(ctx context.Context) {
30+
// no op
31+
}
32+
33+
func (rm *ReportingManager) RunManualSnapshotWriter(ctx context.Context) {
3034
// no op
3135
}

0 commit comments

Comments
 (0)