Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/pve_exporter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def main():
clusterflags.add_argument('--collector.version', dest='collector_version',
action=BooleanOptionalAction, default=True,
help='Exposes PVE version info')
clusterflags.add_argument('--collector.subscription', dest='collector_subscription',
action=BooleanOptionalAction, default=True,
help='Exposes PVE subscription info')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block should be moved to nodeflags (further down).

clusterflags.add_argument('--collector.node', dest='collector_node',
action=BooleanOptionalAction, default=True,
help='Exposes PVE node info')
Expand Down Expand Up @@ -70,6 +73,7 @@ def main():
collectors = CollectorsOptions(
status=params.collector_status,
version=params.collector_version,
subscription=params.collector_subscription,
node=params.collector_node,
cluster=params.collector_cluster,
resources=params.collector_resources,
Expand Down
6 changes: 5 additions & 1 deletion src/pve_exporter/collector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
)
from pve_exporter.collector.node import (
NodeConfigCollector,
NodeReplicationCollector
NodeReplicationCollector,
SubscriptionCollector
)

CollectorsOptions = collections.namedtuple('CollectorsOptions', [
'status',
'version',
'subscription',
'node',
'cluster',
'resources',
Expand All @@ -46,6 +48,8 @@ def collect_pve(config, host, cluster, node, options: CollectorsOptions):
registry.register(ClusterInfoCollector(pve))
if cluster and options.version:
registry.register(VersionCollector(pve))
if cluster and options.subscription:
registry.register(SubscriptionCollector(pve))
if node and options.config:
registry.register(NodeConfigCollector(pve))
if node and options.replication:
Expand Down
49 changes: 49 additions & 0 deletions src/pve_exporter/collector/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging
import itertools
from datetime import datetime

from prometheus_client.core import GaugeMetricFamily

Expand Down Expand Up @@ -126,3 +127,51 @@ def collect(self): # pylint: disable=missing-docstring
metrics[key].add_metric(label_values, metric_value)

return itertools.chain(metrics.values(), info_metrics.values())

class SubscriptionCollector:
"""
Collects Proxmox VE subscription information (node, subscription level, status, next due date).
"""

def __init__(self, pve):
self._pve = pve

def collect(self): # pylint: disable=missing-docstring
info_metric = GaugeMetricFamily(
"pve_subscription_info",
"Proxmox VE subscription info (1 if present)",
labels=["node", "level", "status"],
)
Comment on lines 140 to 151
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to be able to alert on the subscription status, then it shouldn't be a label on an *_info metric. Take a look at the pve_ha_state and pve_lock_state metrics (#302 and #303). With that metric design, I can add an alert which triggers if pve_lock_state != 0 remains for more than, e.g. 5 minutes. And I can have more relaxed alerts for pve_lock_state{state="backup"} != 0 (because backups can take longer).

It looks like the subscription status is an enum with the following options: new notfound active invalid expired suspended.

Thus, the metrics could maybe look more like this:

pve_subscription_status{id="node/proxmox",status="new"} 0.0
pve_subscription_status{id="node/proxmox",status="notfound"} 0.0
pve_subscription_status{id="node/proxmox",status="active"} 1.0
pve_subscription_status{id="node/proxmox",status="invalid"} 0.0
pve_subscription_status{id="node/proxmox",status="expired"} 0.0

Alerting could then be done on pve_subscription_status{status!="active"} != 0


next_due_metric = GaugeMetricFamily(
"pve_subscription_next_due_timestamp",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per the prometheus metric naming recommendations, this should end in the unit (i.e. with a _seconds suffix).

pve_subscription_next_due_timestamp_seconds

"Subscription next due date as Unix timestamp",
labels=["node", "level"],
)

node = None
for entry in self._pve.cluster.status.get():
if entry['type'] == 'node' and entry['local']:
node = entry['name']
break

subscription = self._pve.nodes(node).subscription.get()

level = subscription.get("level", "unknown")
status = subscription.get("status", "unknown")

info_metric.add_metric(
[node, level, status],
1,
)

next_due_date = subscription.get("nextduedate")
if next_due_date:
timestamp = datetime.strptime(next_due_date, "%Y-%m-%d").timestamp()
next_due_metric.add_metric(
[node, level],
timestamp,
)

yield info_metric
yield next_due_metric