Skip to content

Commit e5404a5

Browse files
committed
Enhance ArgoCD deployment failure handler with detailed resource status and troubleshooting commands
1 parent 05018c6 commit e5404a5

File tree

2 files changed

+124
-5
lines changed

2 files changed

+124
-5
lines changed

.github/workflows/argocd-deployment-failure.yml

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@ jobs:
1313
runs-on: ubuntu-latest
1414

1515
steps:
16+
- name: Install kubectl and get cluster info
17+
run: |
18+
# kubectl should already be available in ubuntu-latest
19+
kubectl version --client
20+
1621
- name: Create GitHub Issue
1722
uses: actions/github-script@v7
1823
with:
@@ -28,11 +33,105 @@ jobs:
2833
const repoUrl = payload.repo_url || '';
2934
const targetRevision = payload.target_revision || '';
3035
const timestamp = payload.timestamp || new Date().toISOString();
36+
const degradedResources = payload.degraded_resources || [];
3137
32-
// Extract cluster name from URL (e.g., https://kubernetes.default.svc -> "in-cluster")
38+
// Get actual cluster name using kubectl
39+
const { execSync } = require('child_process');
3340
let clusterName = 'in-cluster';
34-
if (clusterUrl && clusterUrl !== 'unknown' && !clusterUrl.includes('kubernetes.default.svc')) {
35-
clusterName = clusterUrl.replace(/^https?:\/\//, '').split(':')[0];
41+
let clusterContext = 'unknown';
42+
let degradedDetails = '';
43+
44+
try {
45+
// Get actual cluster context name
46+
clusterContext = execSync('kubectl config current-context', { encoding: 'utf-8' }).trim();
47+
48+
// Get cluster info
49+
const clusterInfo = execSync('kubectl cluster-info', { encoding: 'utf-8' }).trim();
50+
const clusterMatch = clusterInfo.match(/Kubernetes control plane is running at (.+)/);
51+
if (clusterMatch) {
52+
clusterName = clusterMatch[1];
53+
}
54+
55+
// Get degraded pods in the namespace
56+
try {
57+
const pods = execSync(`kubectl get pods -n ${namespace} --field-selector=status.phase!=Running,status.phase!=Succeeded -o json`, { encoding: 'utf-8' });
58+
const podsData = JSON.parse(pods);
59+
60+
if (podsData.items && podsData.items.length > 0) {
61+
degradedDetails = '\n### 🔴 Degraded Resources\n\n';
62+
degradedDetails += '#### Pods with Issues\n\n';
63+
64+
for (const pod of podsData.items) {
65+
const podName = pod.metadata.name;
66+
const podPhase = pod.status.phase;
67+
const containerStatuses = pod.status.containerStatuses || [];
68+
69+
degradedDetails += `**Pod:** \`${podName}\`\n`;
70+
degradedDetails += `- **Status:** ${podPhase}\n`;
71+
72+
// Check container statuses
73+
for (const container of containerStatuses) {
74+
if (!container.ready) {
75+
degradedDetails += `- **Container:** \`${container.name}\`\n`;
76+
degradedDetails += ` - Ready: ${container.ready}\n`;
77+
degradedDetails += ` - Restart Count: ${container.restartCount}\n`;
78+
79+
if (container.state.waiting) {
80+
degradedDetails += ` - State: Waiting\n`;
81+
degradedDetails += ` - Reason: ${container.state.waiting.reason}\n`;
82+
degradedDetails += ` - Message: ${container.state.waiting.message || 'N/A'}\n`;
83+
} else if (container.state.terminated) {
84+
degradedDetails += ` - State: Terminated\n`;
85+
degradedDetails += ` - Reason: ${container.state.terminated.reason}\n`;
86+
degradedDetails += ` - Exit Code: ${container.state.terminated.exitCode}\n`;
87+
degradedDetails += ` - Message: ${container.state.terminated.message || 'N/A'}\n`;
88+
}
89+
}
90+
}
91+
degradedDetails += '\n';
92+
}
93+
}
94+
95+
// Get failed deployments
96+
const deployments = execSync(`kubectl get deployments -n ${namespace} -o json`, { encoding: 'utf-8' });
97+
const deploymentsData = JSON.parse(deployments);
98+
99+
if (deploymentsData.items && deploymentsData.items.length > 0) {
100+
const failedDeployments = deploymentsData.items.filter(d =>
101+
d.status.replicas !== d.status.readyReplicas ||
102+
d.status.unavailableReplicas > 0
103+
);
104+
105+
if (failedDeployments.length > 0) {
106+
degradedDetails += '#### Deployments with Issues\n\n';
107+
for (const deploy of failedDeployments) {
108+
degradedDetails += `**Deployment:** \`${deploy.metadata.name}\`\n`;
109+
degradedDetails += `- Desired Replicas: ${deploy.status.replicas || 0}\n`;
110+
degradedDetails += `- Ready Replicas: ${deploy.status.readyReplicas || 0}\n`;
111+
degradedDetails += `- Unavailable Replicas: ${deploy.status.unavailableReplicas || 0}\n`;
112+
113+
if (deploy.status.conditions) {
114+
const failedCondition = deploy.status.conditions.find(c => c.status === 'False');
115+
if (failedCondition) {
116+
degradedDetails += `- Condition: ${failedCondition.type}\n`;
117+
degradedDetails += `- Reason: ${failedCondition.reason}\n`;
118+
degradedDetails += `- Message: ${failedCondition.message}\n`;
119+
}
120+
}
121+
degradedDetails += '\n';
122+
}
123+
}
124+
}
125+
126+
} catch (e) {
127+
console.error('Error getting degraded resources:', e.message);
128+
degradedDetails = '\n### ⚠️ Unable to retrieve degraded resource details\n\n' + e.message + '\n';
129+
}
130+
131+
} catch (e) {
132+
console.error('Error getting cluster info:', e.message);
133+
clusterName = clusterUrl;
134+
clusterContext = 'Unable to retrieve';
36135
}
37136
38137
const issueTitle = `🚨 ArgoCD Deployment Failed: ${appName}`;
@@ -46,7 +145,8 @@ jobs:
46145
47146
| Field | Value |
48147
|-------|-------|
49-
| Cluster Name | \`${clusterName}\` |
148+
| Cluster Context | \`${clusterContext}\` |
149+
| Cluster API Server | \`${clusterName}\` |
50150
| Cluster URL | \`${clusterUrl}\` |
51151
| Namespace | \`${namespace}\` |
52152
@@ -65,6 +165,25 @@ jobs:
65165
\`\`\`
66166
${message}
67167
\`\`\`
168+
${degradedDetails}
169+
### Troubleshooting Commands
170+
171+
\`\`\`bash
172+
# Check application status in ArgoCD
173+
argocd app get ${appName}
174+
175+
# Check pods in namespace
176+
kubectl get pods -n ${namespace}
177+
178+
# Describe failed pods
179+
kubectl describe pods -n ${namespace}
180+
181+
# Get pod logs
182+
kubectl logs -n ${namespace} <pod-name>
183+
184+
# Check events
185+
kubectl get events -n ${namespace} --sort-by='.lastTimestamp'
186+
\`\`\`
68187
69188
### Quick Links
70189

Act-3/argocd-test-app.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: argoproj.io/v1alpha1
22
kind: Application
33
metadata:
4-
name: notworking-aks-store-test
4+
name: another-kind-of-working-aks-store
55
namespace: argocd
66
annotations:
77
# Enable notifications on sync failures

0 commit comments

Comments
 (0)