Skip to content

Commit a8e444c

Browse files
committed
feat: Add health check for CephCluster CRD
Signed-off-by: Arnaud Farbos <[email protected]>
1 parent d7364b4 commit a8e444c

File tree

7 files changed

+187
-0
lines changed

7 files changed

+187
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
-- CRD documentation: https://rook.github.io/docs/rook/latest-release/CRDs/Cluster/ceph-cluster-crd/
2+
local hs = {
3+
status = "Progressing",
4+
message = ""
5+
}
6+
7+
function append_to_message(message)
8+
if message ~= "" then
9+
if hs.message ~= "" then
10+
hs.message = hs.message .. " - " .. message
11+
else
12+
hs.message = message
13+
end
14+
end
15+
end
16+
17+
if obj.status == nil then
18+
append_to_message("Waiting for status to be reported")
19+
return hs
20+
end
21+
22+
-- Check the main Ceph health status first - https://github.com/ceph/ceph/blob/v20.3.0/src/include/health.h#L12
23+
if obj.status.ceph ~= nil and obj.status.ceph.health ~= nil then
24+
local ceph_health = obj.status.ceph.health
25+
local details_message = ""
26+
27+
-- Build details message from status.ceph.details if available
28+
if obj.status.ceph.details ~= nil then
29+
local detail_parts = {}
30+
for detail_type, detail_info in pairs(obj.status.ceph.details) do
31+
if detail_info.message ~= nil then
32+
table.insert(detail_parts, detail_info.message)
33+
end
34+
end
35+
if #detail_parts > 0 then
36+
details_message = " (" .. table.concat(detail_parts, "; ") .. ")"
37+
end
38+
end
39+
40+
if ceph_health == "HEALTH_ERR" or ceph_health == "HEALTH_WARN" then
41+
hs.status = "Degraded"
42+
elseif ceph_health == "HEALTH_OK" then
43+
hs.status = "Healthy"
44+
end
45+
append_to_message("Ceph health is " .. ceph_health .. details_message)
46+
end
47+
48+
-- Check state - https://github.com/rook/rook/blob/v1.17.7/pkg/apis/ceph.rook.io/v1/types.go#L621
49+
if obj.status.state ~= nil then
50+
if hs.status == "Healthy" or hs.status == "Connected" then
51+
append_to_message("Ceph cluster state is " .. obj.status.state)
52+
if obj.status.state == "Created" then
53+
hs.status = "Healthy"
54+
elseif obj.status.state == "Error" then
55+
hs.status = "Degraded"
56+
else
57+
hs.status = "Progressing"
58+
end
59+
end
60+
end
61+
62+
if obj.status.message ~= nil then
63+
append_to_message(obj.status.message)
64+
end
65+
66+
return hs
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
tests:
2+
- healthStatus:
3+
status: Healthy
4+
message: 'Ceph health is HEALTH_OK - Ceph cluster state is Created - Cluster created successfully'
5+
inputPath: testdata/healthy.yaml
6+
- healthStatus:
7+
status: Degraded
8+
message: 'Ceph health is HEALTH_WARN (4 osds down; 2 pools degraded) - Cluster has warnings'
9+
inputPath: testdata/degraded_warn.yaml
10+
- healthStatus:
11+
status: Degraded
12+
message: 'Ceph health is HEALTH_ERR (8 osds down) - Cluster has critical errors'
13+
inputPath: testdata/degraded_error.yaml
14+
- healthStatus:
15+
status: Progressing
16+
message: 'Cluster is being created'
17+
inputPath: testdata/state_creating.yaml
18+
- healthStatus:
19+
status: Progressing
20+
message: 'Waiting for status to be reported'
21+
inputPath: testdata/no_status.yaml
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
apiVersion: ceph.rook.io/v1
2+
kind: CephCluster
3+
metadata:
4+
name: test-ceph-cluster
5+
namespace: rook-ceph
6+
spec:
7+
cephVersion:
8+
image: quay.io/ceph/ceph:v19.2.0
9+
status:
10+
ceph:
11+
health: HEALTH_ERR
12+
details:
13+
OSD_DOWN:
14+
message: 8 osds down
15+
severity: HEALTH_ERR
16+
state: Error
17+
message: Cluster has critical errors
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
apiVersion: ceph.rook.io/v1
2+
kind: CephCluster
3+
metadata:
4+
name: test-ceph-cluster
5+
namespace: rook-ceph
6+
spec:
7+
cephVersion:
8+
image: quay.io/ceph/ceph:v19.2.0
9+
status:
10+
ceph:
11+
health: HEALTH_WARN
12+
details:
13+
OSD_DOWN:
14+
message: 4 osds down
15+
severity: HEALTH_WARN
16+
POOL_DEGRADED:
17+
message: 2 pools degraded
18+
severity: HEALTH_WARN
19+
state: Created
20+
message: Cluster has warnings
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
apiVersion: ceph.rook.io/v1
2+
kind: CephCluster
3+
metadata:
4+
name: test-ceph-cluster
5+
namespace: rook-ceph
6+
spec:
7+
cephVersion:
8+
image: quay.io/ceph/ceph:v19.2.0
9+
mon:
10+
count: 3
11+
mgr:
12+
count: 2
13+
storage:
14+
useAllNodes: true
15+
useAllDevices: false
16+
status:
17+
ceph:
18+
health: HEALTH_OK
19+
lastChecked: "2025-08-11T16:03:08Z"
20+
fsid: c121226d-cac9-492f-8b0b-c05693243380
21+
capacity:
22+
bytesAvailable: 35183103942656
23+
bytesTotal: 35184372088832
24+
bytesUsed: 1268146176
25+
lastUpdated: "2025-08-11T16:03:08Z"
26+
conditions:
27+
- lastHeartbeatTime: "2025-08-11T16:03:08Z"
28+
lastTransitionTime: "2025-08-11T16:03:08Z"
29+
message: Cluster created successfully
30+
reason: ClusterCreated
31+
status: "True"
32+
type: Ready
33+
- lastHeartbeatTime: "2025-08-11T16:03:08Z"
34+
lastTransitionTime: "2025-08-11T16:03:08Z"
35+
message: All OSDs are running
36+
reason: OSDsRunning
37+
status: "True"
38+
type: Progressing
39+
message: Cluster created successfully
40+
phase: Ready
41+
state: Created
42+
version:
43+
image: quay.io/ceph/ceph:v19.2.0
44+
version: 19.2.0-0
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: ceph.rook.io/v1
2+
kind: CephCluster
3+
metadata:
4+
name: test-ceph-cluster
5+
namespace: rook-ceph
6+
spec:
7+
cephVersion:
8+
image: quay.io/ceph/ceph:v19.2.0
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
apiVersion: ceph.rook.io/v1
2+
kind: CephCluster
3+
metadata:
4+
name: test-ceph-cluster
5+
namespace: rook-ceph
6+
spec:
7+
cephVersion:
8+
image: quay.io/ceph/ceph:v19.2.0
9+
status:
10+
state: Creating
11+
message: Cluster is being created

0 commit comments

Comments
 (0)