argocd-sync-failed #12
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ArgoCD Deployment Failure Handler | |
| on: | |
| repository_dispatch: | |
| types: [argocd-sync-failed] | |
| permissions: | |
| issues: write | |
| contents: read | |
| jobs: | |
| create-issue: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Install kubectl and get cluster info | |
| run: | | |
| # kubectl should already be available in ubuntu-latest | |
| kubectl version --client | |
| - name: Create GitHub Issue | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const payload = context.payload.client_payload || {}; | |
| const appName = payload.app_name || 'unknown'; | |
| const clusterUrl = payload.cluster || 'unknown'; | |
| const namespace = payload.namespace || 'default'; | |
| const healthStatus = payload.health_status || 'unknown'; | |
| const syncStatus = payload.sync_status || 'unknown'; | |
| const message = payload.message || 'No error message available'; | |
| const revision = payload.revision || 'unknown'; | |
| const repoUrl = payload.repo_url || ''; | |
| const targetRevision = payload.target_revision || ''; | |
| const timestamp = payload.timestamp || new Date().toISOString(); | |
| const degradedResources = payload.degraded_resources || []; | |
| // Get actual cluster name using kubectl | |
| const { execSync } = require('child_process'); | |
| let clusterName = 'in-cluster'; | |
| let clusterContext = 'unknown'; | |
| let degradedDetails = ''; | |
| try { | |
| // Get actual cluster context name | |
| clusterContext = execSync('kubectl config current-context', { encoding: 'utf-8' }).trim(); | |
| // Get cluster info | |
| const clusterInfo = execSync('kubectl cluster-info', { encoding: 'utf-8' }).trim(); | |
| const clusterMatch = clusterInfo.match(/Kubernetes control plane is running at (.+)/); | |
| if (clusterMatch) { | |
| clusterName = clusterMatch[1]; | |
| } | |
| // Get degraded pods in the namespace | |
| try { | |
| const pods = execSync(`kubectl get pods -n ${namespace} --field-selector=status.phase!=Running,status.phase!=Succeeded -o json`, { encoding: 'utf-8' }); | |
| const podsData = JSON.parse(pods); | |
| if (podsData.items && podsData.items.length > 0) { | |
| degradedDetails = '\n### 🔴 Degraded Resources\n\n'; | |
| degradedDetails += '#### Pods with Issues\n\n'; | |
| for (const pod of podsData.items) { | |
| const podName = pod.metadata.name; | |
| const podPhase = pod.status.phase; | |
| const containerStatuses = pod.status.containerStatuses || []; | |
| degradedDetails += `**Pod:** \`${podName}\`\n`; | |
| degradedDetails += `- **Status:** ${podPhase}\n`; | |
| // Check container statuses | |
| for (const container of containerStatuses) { | |
| if (!container.ready) { | |
| degradedDetails += `- **Container:** \`${container.name}\`\n`; | |
| degradedDetails += ` - Ready: ${container.ready}\n`; | |
| degradedDetails += ` - Restart Count: ${container.restartCount}\n`; | |
| if (container.state.waiting) { | |
| degradedDetails += ` - State: Waiting\n`; | |
| degradedDetails += ` - Reason: ${container.state.waiting.reason}\n`; | |
| degradedDetails += ` - Message: ${container.state.waiting.message || 'N/A'}\n`; | |
| } else if (container.state.terminated) { | |
| degradedDetails += ` - State: Terminated\n`; | |
| degradedDetails += ` - Reason: ${container.state.terminated.reason}\n`; | |
| degradedDetails += ` - Exit Code: ${container.state.terminated.exitCode}\n`; | |
| degradedDetails += ` - Message: ${container.state.terminated.message || 'N/A'}\n`; | |
| } | |
| } | |
| } | |
| degradedDetails += '\n'; | |
| } | |
| } | |
| // Get failed deployments | |
| const deployments = execSync(`kubectl get deployments -n ${namespace} -o json`, { encoding: 'utf-8' }); | |
| const deploymentsData = JSON.parse(deployments); | |
| if (deploymentsData.items && deploymentsData.items.length > 0) { | |
| const failedDeployments = deploymentsData.items.filter(d => | |
| d.status.replicas !== d.status.readyReplicas || | |
| d.status.unavailableReplicas > 0 | |
| ); | |
| if (failedDeployments.length > 0) { | |
| degradedDetails += '#### Deployments with Issues\n\n'; | |
| for (const deploy of failedDeployments) { | |
| degradedDetails += `**Deployment:** \`${deploy.metadata.name}\`\n`; | |
| degradedDetails += `- Desired Replicas: ${deploy.status.replicas || 0}\n`; | |
| degradedDetails += `- Ready Replicas: ${deploy.status.readyReplicas || 0}\n`; | |
| degradedDetails += `- Unavailable Replicas: ${deploy.status.unavailableReplicas || 0}\n`; | |
| if (deploy.status.conditions) { | |
| const failedCondition = deploy.status.conditions.find(c => c.status === 'False'); | |
| if (failedCondition) { | |
| degradedDetails += `- Condition: ${failedCondition.type}\n`; | |
| degradedDetails += `- Reason: ${failedCondition.reason}\n`; | |
| degradedDetails += `- Message: ${failedCondition.message}\n`; | |
| } | |
| } | |
| degradedDetails += '\n'; | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| console.error('Error getting degraded resources:', e.message); | |
| degradedDetails = '\n### ⚠️ Unable to retrieve degraded resource details\n\n' + e.message + '\n'; | |
| } | |
| } catch (e) { | |
| console.error('Error getting cluster info:', e.message); | |
| clusterName = clusterUrl; | |
| clusterContext = 'Unable to retrieve'; | |
| } | |
| const issueTitle = `🚨 ArgoCD Deployment Failed: ${appName}`; | |
| const issueBody = `## ArgoCD Deployment Failure | |
| **Application:** \`${appName}\` | |
| **Timestamp:** ${timestamp} | |
| ### Cluster Information | |
| | Field | Value | | |
| |-------|-------| | |
| | Cluster Context | \`${clusterContext}\` | | |
| | Cluster API Server | \`${clusterName}\` | | |
| | Cluster URL | \`${clusterUrl}\` | | |
| | Namespace | \`${namespace}\` | | |
| ### Application Status | |
| | Field | Value | | |
| |-------|-------| | |
| | Health Status | \`${healthStatus}\` | | |
| | Sync Status | \`${syncStatus}\` | | |
| | Revision | \`${revision}\` | | |
| | Target Revision | \`${targetRevision}\` | | |
| | Repository | ${repoUrl} | | |
| ### Error Message | |
| \`\`\` | |
| ${message} | |
| \`\`\` | |
| ${degradedDetails} | |
| ### Troubleshooting Commands | |
| \`\`\`bash | |
| # Check application status in ArgoCD | |
| argocd app get ${appName} | |
| # Check pods in namespace | |
| kubectl get pods -n ${namespace} | |
| # Describe failed pods | |
| kubectl describe pods -n ${namespace} | |
| # Get pod logs | |
| kubectl logs -n ${namespace} <pod-name> | |
| # Check events | |
| kubectl get events -n ${namespace} --sort-by='.lastTimestamp' | |
| \`\`\` | |
| ### Quick Links | |
| - [ArgoCD UI](https://localhost:8080/applications/${appName}) | |
| - [Source Repository](${repoUrl}) | |
| --- | |
| *This issue was automatically created by ArgoCD Notifications* | |
| `; | |
| // Check if similar issue already exists (open) | |
| const existingIssues = await github.rest.issues.listForRepo({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| state: 'open', | |
| labels: 'argocd-deployment-failure', | |
| per_page: 100 | |
| }); | |
| const duplicateIssue = existingIssues.data.find(issue => | |
| issue.title.includes(appName) && issue.title.includes('Deployment Failed') | |
| ); | |
| if (duplicateIssue) { | |
| // Add comment to existing issue instead of creating new one | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: duplicateIssue.number, | |
| body: `### 🔄 Deployment Failed Again\n\n**Timestamp:** ${timestamp}\n**Revision:** \`${revision}\`\n\n${message ? '**Error:**\n```\n' + message + '\n```' : ''}` | |
| }); | |
| console.log(`Updated existing issue #${duplicateIssue.number}`); | |
| } else { | |
| // Create new issue | |
| const issue = await github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: issueTitle, | |
| body: issueBody, | |
| labels: ['argocd-deployment-failure', 'automated', 'bug'] | |
| }); | |
| console.log(`Created issue #${issue.data.number}`); | |
| } |