diff --git a/CHANGELOG.md b/CHANGELOG.md index a6ce6dc1114..8a1ffc23107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -157,6 +157,7 @@ Internal types are updated to use `scope` instead of `instrumentation_library`. * [ENHANCEMENT] distributor: Log span names when `distributor.log_received_spans.include_all_attributes` is on [#1790](https://github.com/grafana/tempo/pull/1790) (@suraciii) * [ENHANCEMENT] metrics-generator: truncate label names and values exceeding a configurable length [#1897](https://github.com/grafana/tempo/pull/1897) (@kvrhdn) * [ENHANCEMENT] Add parquet WAL [#1878](https://github.com/grafana/tempo/pull/1878) (@joe-elliott, @mdisibio) +* [ENHANCEMENT] Convert last few Jsonnet alerts with per_cluster_label [#2000](https://github.com/grafana/tempo/pull/2000) (@Whyeasy) * [BUGFIX] Honor caching and buffering settings when finding traces by id [#1697](https://github.com/grafana/tempo/pull/1697) (@joe-elliott) * [BUGFIX] Correctly propagate errors from the iterator layer up through the queriers [#1723](https://github.com/grafana/tempo/pull/1723) (@joe-elliott) * [BUGFIX] Make multitenancy work with HTTP [#1781](https://github.com/grafana/tempo/pull/1781) (@gouthamve) diff --git a/operations/tempo-mixin/alerts.libsonnet b/operations/tempo-mixin/alerts.libsonnet index 124e6bc4d53..030f8b9c157 100644 --- a/operations/tempo-mixin/alerts.libsonnet +++ b/operations/tempo-mixin/alerts.libsonnet @@ -155,8 +155,8 @@ { alert: 'TempoBadOverrides', expr: ||| - sum(tempo_runtime_config_last_reload_successful{namespace=~"%s"} == 0) by (cluster, namespace, job) - ||| % $._config.namespace, + sum(tempo_runtime_config_last_reload_successful{namespace=~"%s"} == 0) by (%s) + ||| % [$._config.namespace, $._config.group_by_job], 'for': '15m', labels: { severity: 'warning', @@ -171,14 +171,14 @@ alert: 'TempoProvisioningTooManyWrites', // 30MB/s written to the WAL per ingester max expr: ||| - avg by (cluster, namespace) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30 - |||, + avg by (%s) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30 + ||| % $._config.group_by_cluster, 'for': '15m', labels: { severity: 'warning', }, annotations: { - message: 'Ingesters in {{ $labels.cluster }}/{{ $labels.namespace }} are receiving more data/second than desired, add more ingesters.', + message: 'Ingesters in {{ $labels.%s }}/{{ $labels.namespace }} are receiving more data/second than desired, add more ingesters.' % $._config.per_cluster_label, runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoProvisioningTooManyWrites', }, }, @@ -186,28 +186,28 @@ { alert: 'TempoCompactorsTooManyOutstandingBlocks', expr: ||| - sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (cluster, namespace) > %d - ||| % [$._config.namespace, $._config.namespace, $._config.alerts.outstanding_blocks_warning], + sum by (%s) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (%s) > %d + ||| % [$._config.group_by_tenant, $._config.namespace, $._config.namespace, $._config.group_by_cluster, $._config.alerts.outstanding_blocks_warning], 'for': '6h', labels: { severity: 'warning', }, annotations: { - message: "There are too many outstanding compaction blocks in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors.", + message: "There are too many outstanding compaction blocks in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors." % $._config.per_cluster_label, runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks', }, }, { alert: 'TempoCompactorsTooManyOutstandingBlocks', expr: ||| - sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (cluster, namespace) > %d - ||| % [$._config.namespace, $._config.namespace, $._config.alerts.outstanding_blocks_critical], + sum by (%s) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (%s) > %d + ||| % [$._config.group_by_tenant, $._config.namespace, $._config.namespace, $._config.group_by_cluster, $._config.alerts.outstanding_blocks_critical], 'for': '24h', labels: { severity: 'critical', }, annotations: { - message: "There are too many outstanding compaction blocks in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors.", + message: "There are too many outstanding compaction blocks in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors." % $._config.per_cluster_label, runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks', }, }, @@ -221,7 +221,7 @@ severity: 'critical', }, annotations: { - message: 'Tempo ingester has encountered errors while replaying a block on startup in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}', + message: 'Tempo ingester has encountered errors while replaying a block on startup in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}' % $._config.per_cluster_label, runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterReplayErrors', }, },