Add auto-unquarantine tests agent and workflow #1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Auto-Unquarantine Tests | ||
| on: | ||
| workflow_dispatch: | ||
| inputs: | ||
| min_runs_threshold: | ||
| description: 'Minimum number of test runs required per OS to consider the test stable' | ||
| required: false | ||
| default: '90' | ||
| type: string | ||
| dry_run: | ||
| description: 'Dry run mode - analyze but do not create PR' | ||
| required: false | ||
| default: false | ||
| type: boolean | ||
| schedule: | ||
| # Run every Monday at 2:00 AM UTC | ||
| - cron: '0 2 * * 1' | ||
| # Prevent concurrent runs to avoid conflicts | ||
| concurrency: | ||
| group: auto-unquarantine-tests | ||
| cancel-in-progress: false | ||
| permissions: | ||
| contents: write | ||
| issues: write | ||
| pull-requests: write | ||
| jobs: | ||
| auto_unquarantine: | ||
| runs-on: ubuntu-latest | ||
| timeout-minutes: 30 | ||
| env: | ||
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
| MIN_RUNS_THRESHOLD: ${{ inputs.min_runs_threshold || '90' }} | ||
| DRY_RUN: ${{ inputs.dry_run || 'false' }} | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
| with: | ||
| fetch-depth: 0 | ||
| - name: Setup .NET | ||
| uses: actions/setup-dotnet@v4 | ||
| with: | ||
| global-json-file: global.json | ||
| - name: Fetch quarantined test issues | ||
| id: fetch-issues | ||
| run: | | ||
| echo "Fetching open issues with 'quarantined-test' label..." | ||
| gh issue list \ | ||
| --repo dotnet/aspire \ | ||
| --label "quarantined-test" \ | ||
| --state open \ | ||
| --json number,title,body \ | ||
| --limit 100 > quarantined_issues.json | ||
| ISSUE_COUNT=$(jq length quarantined_issues.json) | ||
| echo "Found $ISSUE_COUNT quarantined test issues" | ||
| echo "issue_count=$ISSUE_COUNT" >> $GITHUB_OUTPUT | ||
| # Display summary | ||
| jq -r '.[] | "#\(.number): \(.title)"' quarantined_issues.json | head -10 | ||
| - name: Analyze issues and identify stable tests | ||
| id: analyze | ||
| run: | | ||
| echo "Analyzing issues for stable tests..." | ||
| # Create a temporary script to parse issue bodies | ||
| cat > analyze_issues.sh << 'ANALYZE_SCRIPT' | ||
| #!/bin/bash | ||
| set -e | ||
| MIN_RUNS=$1 | ||
| ISSUES_FILE=$2 | ||
| OUTPUT_FILE=$3 | ||
| echo "Minimum runs threshold: $MIN_RUNS" | ||
| # Initialize output | ||
| echo "[]" > "$OUTPUT_FILE" | ||
| # Process each issue | ||
| jq -c '.[]' "$ISSUES_FILE" | while read -r issue; do | ||
| ISSUE_NUM=$(echo "$issue" | jq -r '.number') | ||
| ISSUE_TITLE=$(echo "$issue" | jq -r '.title') | ||
| ISSUE_BODY=$(echo "$issue" | jq -r '.body // ""') | ||
| echo "Analyzing issue #$ISSUE_NUM: $ISSUE_TITLE" | ||
| # Check if issue body contains quarantine report | ||
| if ! echo "$ISSUE_BODY" | grep -q "Report for Quarantined runs"; then | ||
| echo " ⏭️ No quarantine report found, skipping" | ||
| continue | ||
| fi | ||
| # Extract failure counts for each OS | ||
| # Look for patterns like "linux (0/100)", "macos (0/95)", "windows (0/98)" | ||
| LINUX_FAILURES=$(echo "$ISSUE_BODY" | grep -oP 'linux[^(]*\(\K\d+(?=/\d+\))' | head -1) | ||
| LINUX_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'linux[^(]*\(\d+/\K\d+(?=\))' | head -1) | ||
| MACOS_FAILURES=$(echo "$ISSUE_BODY" | grep -oP 'macos[^(]*\(\K\d+(?=/\d+\))' | head -1) | ||
| MACOS_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'macos[^(]*\(\d+/\K\d+(?=\))' | head -1) | ||
| WINDOWS_FAILURES=$(echo "$ISSUE_BODY" | grep -oP 'windows[^(]*\(\K\d+(?=/\d+\))' | head -1) | ||
| WINDOWS_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'windows[^(]*\(\d+/\K\d+(?=\))' | head -1) | ||
| # Alternative: check for "No failures ✅" format | ||
| if [ -z "$LINUX_FAILURES" ]; then | ||
| if echo "$ISSUE_BODY" | grep -q "linux.*No failures"; then | ||
| LINUX_FAILURES=0 | ||
| LINUX_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'linux.*?(\d+)\s+runs?' | grep -oP '\d+' | head -1) | ||
| fi | ||
| fi | ||
| if [ -z "$MACOS_FAILURES" ]; then | ||
| if echo "$ISSUE_BODY" | grep -q "macos.*No failures"; then | ||
| MACOS_FAILURES=0 | ||
| MACOS_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'macos.*?(\d+)\s+runs?' | grep -oP '\d+' | head -1) | ||
| fi | ||
| fi | ||
| if [ -z "$WINDOWS_FAILURES" ]; then | ||
| if echo "$ISSUE_BODY" | grep -q "windows.*No failures"; then | ||
| WINDOWS_FAILURES=0 | ||
| WINDOWS_RUNS=$(echo "$ISSUE_BODY" | grep -oP 'windows.*?(\d+)\s+runs?' | grep -oP '\d+' | head -1) | ||
| fi | ||
| fi | ||
| # Debug output | ||
| echo " Linux: $LINUX_FAILURES/$LINUX_RUNS" | ||
| echo " macOS: $MACOS_FAILURES/$MACOS_RUNS" | ||
| echo " Windows: $WINDOWS_FAILURES/$WINDOWS_RUNS" | ||
| # Check if all OSes have data | ||
| if [ -z "$LINUX_FAILURES" ] || [ -z "$LINUX_RUNS" ] || \ | ||
| [ -z "$MACOS_FAILURES" ] || [ -z "$MACOS_RUNS" ] || \ | ||
| [ -z "$WINDOWS_FAILURES" ] || [ -z "$WINDOWS_RUNS" ]; then | ||
| echo " ⏭️ Missing data for one or more platforms, skipping" | ||
| continue | ||
| fi | ||
| # Check if all failures are 0 | ||
| if [ "$LINUX_FAILURES" != "0" ] || \ | ||
| [ "$MACOS_FAILURES" != "0" ] || \ | ||
| [ "$WINDOWS_FAILURES" != "0" ]; then | ||
| echo " ⏭️ Has failures, skipping" | ||
| continue | ||
| fi | ||
| # Check if all runs meet threshold | ||
| if [ "$LINUX_RUNS" -lt "$MIN_RUNS" ] || \ | ||
| [ "$MACOS_RUNS" -lt "$MIN_RUNS" ] || \ | ||
| [ "$WINDOWS_RUNS" -lt "$MIN_RUNS" ]; then | ||
| echo " ⏭️ Insufficient runs (need $MIN_RUNS per OS), skipping" | ||
| continue | ||
| fi | ||
| # Extract test method name from title | ||
| # Common patterns: "Failing test: Method", "[Failing test]: Method", "Quarantine: Method" | ||
| TEST_METHOD=$(echo "$ISSUE_TITLE" | grep -oP '(?:Failing test:|Flaky test:|\[Failing test\]:|\[Flaky test\]:|Quarantine:)\s*\K.+' | xargs) | ||
| if [ -z "$TEST_METHOD" ]; then | ||
| # Try extracting from end of title | ||
| TEST_METHOD=$(echo "$ISSUE_TITLE" | rev | cut -d' ' -f1 | rev) | ||
| fi | ||
| echo " ✅ Test is stable: $TEST_METHOD" | ||
| echo " Will unquarantine from issue #$ISSUE_NUM" | ||
| # Add to results | ||
| TEMP_RESULT=$(jq -n \ | ||
| --arg issue "$ISSUE_NUM" \ | ||
| --arg title "$ISSUE_TITLE" \ | ||
| --arg method "$TEST_METHOD" \ | ||
| --arg linux "$LINUX_RUNS" \ | ||
| --arg macos "$MACOS_RUNS" \ | ||
| --arg windows "$WINDOWS_RUNS" \ | ||
| '{issue: $issue, title: $title, method: $method, linux_runs: $linux, macos_runs: $macos, windows_runs: $windows}') | ||
| jq --argjson new "$TEMP_RESULT" '. += [$new]' "$OUTPUT_FILE" > "${OUTPUT_FILE}.tmp" | ||
| mv "${OUTPUT_FILE}.tmp" "$OUTPUT_FILE" | ||
| done | ||
| STABLE_COUNT=$(jq length "$OUTPUT_FILE") | ||
| echo "Found $STABLE_COUNT stable tests eligible for unquarantining" | ||
| ANALYZE_SCRIPT | ||
| chmod +x analyze_issues.sh | ||
| ./analyze_issues.sh "$MIN_RUNS_THRESHOLD" quarantined_issues.json stable_tests.json | ||
| # Output results | ||
| STABLE_COUNT=$(jq length stable_tests.json) | ||
| echo "stable_count=$STABLE_COUNT" >> $GITHUB_OUTPUT | ||
| if [ "$STABLE_COUNT" -gt "0" ]; then | ||
| echo "Stable tests found:" | ||
| jq -r '.[] | " #\(.issue): \(.method) (\(.linux_runs)/\(.macos_runs)/\(.windows_runs) runs)"' stable_tests.json | ||
| else | ||
| echo "No stable tests found that meet all criteria" | ||
| fi | ||
| - name: Search and unquarantine tests | ||
| id: unquarantine | ||
| if: steps.analyze.outputs.stable_count > 0 && env.DRY_RUN == 'false' | ||
| run: | | ||
| echo "Unquarantining stable tests..." | ||
| TESTS_PROCESSED=0 | ||
| TESTS_SUCCESS=0 | ||
| TESTS_FAILED=0 | ||
| FAILED_TESTS="" | ||
| jq -c '.[]' stable_tests.json | while read -r test; do | ||
| ISSUE_NUM=$(echo "$test" | jq -r '.issue') | ||
| TEST_METHOD=$(echo "$test" | jq -r '.method') | ||
| echo "Processing test: $TEST_METHOD (from issue #$ISSUE_NUM)" | ||
| TESTS_PROCESSED=$((TESTS_PROCESSED + 1)) | ||
| # Search for fully-qualified test method name if needed | ||
| if [[ ! "$TEST_METHOD" =~ \. ]]; then | ||
| echo " Searching for fully-qualified name..." | ||
| SEARCH_RESULTS=$(grep -r "public.*void.*${TEST_METHOD}\|public.*async.*Task.*${TEST_METHOD}" tests/ --include="*.cs" -l || true) | ||
| if [ -z "$SEARCH_RESULTS" ]; then | ||
| echo " ❌ Could not find test method in repository" | ||
| TESTS_FAILED=$((TESTS_FAILED + 1)) | ||
| FAILED_TESTS="${FAILED_TESTS}${TEST_METHOD} (not found)\n" | ||
| continue | ||
| fi | ||
| # Get first file and extract namespace/class | ||
| TEST_FILE=$(echo "$SEARCH_RESULTS" | head -1) | ||
| echo " Found in: $TEST_FILE" | ||
| # Extract namespace and class from file | ||
| NAMESPACE=$(grep -oP '^namespace\s+\K[^;{]+' "$TEST_FILE" | head -1 | xargs) | ||
| CLASS=$(grep -oP '^\s*public\s+class\s+\K\w+' "$TEST_FILE" | head -1) | ||
| if [ -n "$NAMESPACE" ] && [ -n "$CLASS" ]; then | ||
| TEST_METHOD="${NAMESPACE}.${CLASS}.${TEST_METHOD}" | ||
| echo " Fully-qualified name: $TEST_METHOD" | ||
| else | ||
| echo " ⚠️ Could not determine fully-qualified name, trying as-is" | ||
| fi | ||
| fi | ||
| # Run QuarantineTools to unquarantine | ||
| echo " Running QuarantineTools..." | ||
| if dotnet run --project tools/QuarantineTools -- -u "$TEST_METHOD"; then | ||
| echo " ✅ Successfully unquarantined" | ||
| TESTS_SUCCESS=$((TESTS_SUCCESS + 1)) | ||
| else | ||
| echo " ❌ QuarantineTools failed" | ||
| TESTS_FAILED=$((TESTS_FAILED + 1)) | ||
| FAILED_TESTS="${FAILED_TESTS}${TEST_METHOD} (unquarantine failed)\n" | ||
| fi | ||
| done | ||
| echo "tests_processed=$TESTS_PROCESSED" >> $GITHUB_OUTPUT | ||
| echo "tests_success=$TESTS_SUCCESS" >> $GITHUB_OUTPUT | ||
| echo "tests_failed=$TESTS_FAILED" >> $GITHUB_OUTPUT | ||
| # Save failed tests for PR description | ||
| echo -e "$FAILED_TESTS" > failed_tests.txt | ||
| - name: Check for changes | ||
| id: check-changes | ||
| if: steps.analyze.outputs.stable_count > 0 && env.DRY_RUN == 'false' | ||
| run: | | ||
| if git diff --quiet; then | ||
| echo "has_changes=false" >> $GITHUB_OUTPUT | ||
| echo "No changes were made" | ||
| else | ||
| echo "has_changes=true" >> $GITHUB_OUTPUT | ||
| echo "Changes detected:" | ||
| git diff --stat | ||
| fi | ||
| - name: Build affected test projects | ||
| id: build | ||
| if: steps.check-changes.outputs.has_changes == 'true' | ||
| run: | | ||
| echo "Building affected test projects..." | ||
| # Get list of modified test projects | ||
| MODIFIED_FILES=$(git diff --name-only) | ||
| echo "$MODIFIED_FILES" | ||
| # Extract unique test project directories | ||
| TEST_PROJECTS=$(echo "$MODIFIED_FILES" | grep '^tests/' | cut -d'/' -f1-2 | sort -u) | ||
| BUILD_SUCCESS=true | ||
| for PROJECT_DIR in $TEST_PROJECTS; do | ||
| CSPROJ=$(find "$PROJECT_DIR" -name "*.csproj" | head -1) | ||
| if [ -n "$CSPROJ" ]; then | ||
| echo "Building $CSPROJ..." | ||
| if ! dotnet build "$CSPROJ"; then | ||
| echo "❌ Build failed for $CSPROJ" | ||
| BUILD_SUCCESS=false | ||
| fi | ||
| fi | ||
| done | ||
| if [ "$BUILD_SUCCESS" = "false" ]; then | ||
| echo "Build failed for one or more test projects" | ||
| exit 1 | ||
| fi | ||
| echo "✅ All test projects built successfully" | ||
| - name: Create Pull Request | ||
| if: steps.check-changes.outputs.has_changes == 'true' | ||
| uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 | ||
| env: | ||
| STABLE_TESTS_JSON: ${{ steps.analyze.outputs.stable_count > 0 && 'stable_tests.json' || '' }} | ||
| TESTS_SUCCESS: ${{ steps.unquarantine.outputs.tests_success || '0' }} | ||
| TESTS_FAILED: ${{ steps.unquarantine.outputs.tests_failed || '0' }} | ||
| with: | ||
| script: | | ||
| const { spawnSync } = require('child_process'); | ||
| const fs = require('fs'); | ||
| const path = require('path'); | ||
| // Read stable tests data | ||
| const stableTests = JSON.parse(fs.readFileSync('stable_tests.json', 'utf8')); | ||
| const testsSuccess = parseInt(process.env.TESTS_SUCCESS); | ||
| const testsFailed = parseInt(process.env.TESTS_FAILED); | ||
| if (stableTests.length === 0) { | ||
| console.log('No tests to unquarantine, skipping PR creation'); | ||
| return; | ||
| } | ||
| // Helper to run git commands | ||
| function runGit(args) { | ||
| const result = spawnSync('git', args, { encoding: 'utf-8', stdio: 'pipe' }); | ||
| if (result.status !== 0) { | ||
| throw new Error(result.stderr || result.stdout || 'Git command failed'); | ||
| } | ||
| return result.stdout; | ||
| } | ||
| // Configure git | ||
| runGit(['config', 'user.name', 'github-actions']); | ||
| runGit(['config', 'user.email', '[email protected]']); | ||
| // Create branch | ||
| const branchName = `automated/unquarantine-stable-tests-${Date.now()}`; | ||
| runGit(['checkout', '-b', branchName]); | ||
| // Commit changes | ||
| runGit(['add', '-A']); | ||
| // Build commit message | ||
| const issueNumbers = stableTests.map(t => `#${t.issue}`).join(', '); | ||
| const testList = stableTests.map(t => | ||
| `- ${t.method}: 0 failures in ${t.linux_runs}+ runs (linux/macos/windows)` | ||
| ).join('\n'); | ||
| const commitMessage = `[automated] Unquarantine stable tests | ||
| Automatically unquarantined the following tests that have been stable across all platforms: | ||
| ${testList} | ||
| Closes ${issueNumbers}`; | ||
| const msgFile = path.join(process.env.RUNNER_TEMP, 'commit-msg.txt'); | ||
| fs.writeFileSync(msgFile, commitMessage); | ||
| try { | ||
| runGit(['commit', '-F', msgFile]); | ||
| } finally { | ||
| fs.unlinkSync(msgFile); | ||
| } | ||
| // Push branch | ||
| runGit(['push', 'origin', branchName]); | ||
| // Build PR description | ||
| const testTable = stableTests.map(t => { | ||
| const filePath = runGit(['diff', '--name-only', 'HEAD~1']).split('\n') | ||
| .find(f => f.includes(t.method.split('.').slice(-2, -1)[0])) || 'N/A'; | ||
| return `| ${t.method} | ${filePath} | #${t.issue} | 0/${t.linux_runs} | 0/${t.macos_runs} | 0/${t.windows_runs} |`; | ||
| }).join('\n'); | ||
| let prBody = `## Summary | ||
| This PR automatically unquarantines tests that have been stable across all platforms in recent CI runs. | ||
| ## Tests Unquarantined | ||
| | Test Method | File | Issue | Linux | macOS | Windows | | ||
| |-------------|------|-------|-------|-------|---------| | ||
| ${testTable} | ||
| ## Criteria | ||
| Tests were automatically selected for unquarantining based on: | ||
| - ✅ Zero failures across all platforms (linux, macos, windows) | ||
| - ✅ Minimum ${process.env.MIN_RUNS_THRESHOLD} runs per platform | ||
| - ✅ Recent quarantine report data available | ||
| ## Verification | ||
| ✅ Built all affected test projects successfully | ||
| ✅ Verified attribute removal using QuarantineTools | ||
| ✅ All tests meet stability criteria | ||
| ## Statistics | ||
| - Tests processed: ${testsSuccess + testsFailed} | ||
| - Successfully unquarantined: ${testsSuccess} | ||
| - Failed to unquarantine: ${testsFailed} | ||
| ## Related Issues | ||
| Closes ${issueNumbers} | ||
| --- | ||
| **Note:** This PR was automatically generated by the auto-unquarantine-tests workflow based on CI run data. If any of these tests start failing again, they can be re-quarantined.`; | ||
| // Add failed tests section if any | ||
| if (testsFailed > 0 && fs.existsSync('failed_tests.txt')) { | ||
| const failedTests = fs.readFileSync('failed_tests.txt', 'utf8').trim(); | ||
| if (failedTests) { | ||
| prBody += `\n\n## ⚠️ Unable to Unquarantine\n\nThe following tests could not be unquarantined:\n\n${failedTests}`; | ||
| } | ||
| } | ||
| // Create PR | ||
| const { data: pr } = await github.rest.pulls.create({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| title: '[automated] Unquarantine stable tests (weekly auto-cleanup)', | ||
| body: prBody, | ||
| head: branchName, | ||
| base: 'main' | ||
| }); | ||
| console.log(`Created PR #${pr.number}: ${pr.html_url}`); | ||
| // Add labels | ||
| try { | ||
| await github.rest.issues.addLabels({ | ||
| owner: context.repo.owner, | ||
| repo: context.repo.repo, | ||
| issue_number: pr.number, | ||
| labels: ['area-testing', 'automated'] | ||
| }); | ||
| } catch (labelError) { | ||
| console.log(`Note: Could not add labels: ${labelError.message}`); | ||
| } | ||
| - name: Post dry-run summary | ||
| if: env.DRY_RUN == 'true' && steps.analyze.outputs.stable_count > 0 | ||
| run: | | ||
| echo "🔍 DRY RUN MODE - No PR created" | ||
| echo "" | ||
| echo "Would unquarantine the following tests:" | ||
| jq -r '.[] | " - \(.method) (issue #\(.issue))"' stable_tests.json | ||
| - name: Post completion summary | ||
| if: always() | ||
| run: | | ||
| echo "==========================================" | ||
| echo "Auto-Unquarantine Workflow Summary" | ||
| echo "==========================================" | ||
| echo "Issues scanned: ${{ steps.fetch-issues.outputs.issue_count }}" | ||
| echo "Stable tests found: ${{ steps.analyze.outputs.stable_count }}" | ||
| if [ "$DRY_RUN" = "true" ]; then | ||
| echo "Mode: DRY RUN (no changes made)" | ||
| else | ||
| echo "Tests processed: ${{ steps.unquarantine.outputs.tests_processed || 0 }}" | ||
| echo "Successfully unquarantined: ${{ steps.unquarantine.outputs.tests_success || 0 }}" | ||
| echo "Failed: ${{ steps.unquarantine.outputs.tests_failed || 0 }}" | ||
| fi | ||
| echo "==========================================" | ||