Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ Internal types are updated to use `scope` instead of `instrumentation_library`.
* [ENHANCEMENT] distributor: Log span names when `distributor.log_received_spans.include_all_attributes` is on [#1790](https://github.com/grafana/tempo/pull/1790) (@suraciii)
* [ENHANCEMENT] metrics-generator: truncate label names and values exceeding a configurable length [#1897](https://github.com/grafana/tempo/pull/1897) (@kvrhdn)
* [ENHANCEMENT] Add parquet WAL [#1878](https://github.com/grafana/tempo/pull/1878) (@joe-elliott, @mdisibio)
* [ENHANCEMENT] Convert last few Jsonnet alerts with per_cluster_label [#2000](https://github.com/grafana/tempo/pull/2000) (@Whyeasy)
* [BUGFIX] Honor caching and buffering settings when finding traces by id [#1697](https://github.com/grafana/tempo/pull/1697) (@joe-elliott)
* [BUGFIX] Correctly propagate errors from the iterator layer up through the queriers [#1723](https://github.com/grafana/tempo/pull/1723) (@joe-elliott)
* [BUGFIX] Make multitenancy work with HTTP [#1781](https://github.com/grafana/tempo/pull/1781) (@gouthamve)
Expand Down
24 changes: 12 additions & 12 deletions operations/tempo-mixin/alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@
{
alert: 'TempoBadOverrides',
expr: |||
sum(tempo_runtime_config_last_reload_successful{namespace=~"%s"} == 0) by (cluster, namespace, job)
||| % $._config.namespace,
sum(tempo_runtime_config_last_reload_successful{namespace=~"%s"} == 0) by (%s)
||| % [$._config.namespace, $._config.group_by_job],
'for': '15m',
labels: {
severity: 'warning',
Expand All @@ -171,43 +171,43 @@
alert: 'TempoProvisioningTooManyWrites',
// 30MB/s written to the WAL per ingester max
expr: |||
avg by (cluster, namespace) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30
|||,
avg by (%s) (rate(tempo_ingester_bytes_received_total{job=~".+/ingester"}[1m])) / 1024 / 1024 > 30
||| % $._config.group_by_cluster,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
message: 'Ingesters in {{ $labels.cluster }}/{{ $labels.namespace }} are receiving more data/second than desired, add more ingesters.',
message: 'Ingesters in {{ $labels.%s }}/{{ $labels.namespace }} are receiving more data/second than desired, add more ingesters.' % $._config.per_cluster_label,
runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoProvisioningTooManyWrites',
},
},
// compactors
{
alert: 'TempoCompactorsTooManyOutstandingBlocks',
expr: |||
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (cluster, namespace) > %d
||| % [$._config.namespace, $._config.namespace, $._config.alerts.outstanding_blocks_warning],
sum by (%s) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (%s) > %d
||| % [$._config.group_by_tenant, $._config.namespace, $._config.namespace, $._config.group_by_cluster, $._config.alerts.outstanding_blocks_warning],
'for': '6h',
labels: {
severity: 'warning',
},
annotations: {
message: "There are too many outstanding compaction blocks in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors.",
message: "There are too many outstanding compaction blocks in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors." % $._config.per_cluster_label,
runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks',
},
},
{
alert: 'TempoCompactorsTooManyOutstandingBlocks',
expr: |||
sum by (cluster, namespace, tenant) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (cluster, namespace) > %d
||| % [$._config.namespace, $._config.namespace, $._config.alerts.outstanding_blocks_critical],
sum by (%s) (tempodb_compaction_outstanding_blocks{container="compactor", namespace=~"%s"}) / ignoring(tenant) group_left count(tempo_build_info{container="compactor", namespace=~"%s"}) by (%s) > %d
||| % [$._config.group_by_tenant, $._config.namespace, $._config.namespace, $._config.group_by_cluster, $._config.alerts.outstanding_blocks_critical],
'for': '24h',
labels: {
severity: 'critical',
},
annotations: {
message: "There are too many outstanding compaction blocks in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors.",
message: "There are too many outstanding compaction blocks in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}, increase compactor's CPU or add more compactors." % $._config.per_cluster_label,
runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoCompactorsTooManyOutstandingBlocks',
},
},
Expand All @@ -221,7 +221,7 @@
severity: 'critical',
},
annotations: {
message: 'Tempo ingester has encountered errors while replaying a block on startup in {{ $labels.cluster }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}',
message: 'Tempo ingester has encountered errors while replaying a block on startup in {{ $labels.%s }}/{{ $labels.namespace }} for tenant {{ $labels.tenant }}' % $._config.per_cluster_label,
runbook_url: 'https://github.com/grafana/tempo/tree/main/operations/tempo-mixin/runbook.md#TempoIngesterReplayErrors',
},
},
Expand Down