diff --git a/.github/workflows/ai-content-detection.yml b/.github/workflows/ai-content-detection.yml index 13688fa1b4e7..b16d7a2a0cc7 100644 --- a/.github/workflows/ai-content-detection.yml +++ b/.github/workflows/ai-content-detection.yml @@ -36,10 +36,105 @@ on: default: '20000' type: string pull_request_target: + types: [opened, synchronize, reopened] + +concurrency: + group: ai-detection-${{ github.event.pull_request.number || github.event.inputs.pr_number || github.run_id }} + cancel-in-progress: true jobs: + validate-inputs: + name: 'Validate Inputs' + runs-on: ubuntu-latest + outputs: + pr-number: ${{ steps.extract.outputs.pr-number }} + confidence-threshold: ${{ steps.extract.outputs.confidence-threshold }} + should-analyze: ${{ steps.validate.outputs.should-analyze }} + dry-run: ${{ steps.extract.outputs.dry-run }} + fail-on-detection: ${{ steps.extract.outputs.fail-on-detection }} + diff-max-chars: ${{ steps.extract.outputs.diff-max-chars }} + steps: + - name: Extract and set defaults + id: extract + run: | + # Extract PR number + PR_NUMBER="${{ github.event.inputs.pr_number || github.event.pull_request.number }}" + echo "pr-number=${PR_NUMBER}" >> $GITHUB_OUTPUT + + # Extract confidence threshold + CONFIDENCE="${{ github.event.inputs.confidence_threshold || '80' }}" + echo "confidence-threshold=${CONFIDENCE}" >> $GITHUB_OUTPUT + + # Set DRY_RUN: always true for pull_request_target, respect input for workflow_dispatch + if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then + echo "dry-run=true" >> $GITHUB_OUTPUT + else + echo "dry-run=${{ github.event.inputs.dry_run }}" >> $GITHUB_OUTPUT + fi + + # Set FAIL_ON_DETECTION: only true for workflow_dispatch with explicit input + if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.fail_on_detection }}" == "true" ]]; then + echo "fail-on-detection=true" >> $GITHUB_OUTPUT + else + echo "fail-on-detection=false" >> $GITHUB_OUTPUT + fi + + # Extract diff_max_chars + DIFF_MAX="${{ github.event.inputs.diff_max_chars || '20000' }}" + echo "diff-max-chars=${DIFF_MAX}" >> $GITHUB_OUTPUT + + - name: Validate inputs + id: validate + run: | + PR_NUMBER="${{ steps.extract.outputs.pr-number }}" + CONFIDENCE="${{ steps.extract.outputs.confidence-threshold }}" + + # Validate PR number + if [[ -z "$PR_NUMBER" ]]; then + echo "::error::PR number is required" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + + if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid PR number: $PR_NUMBER (must be numeric)" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + + # Validate confidence threshold + if ! [[ "$CONFIDENCE" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid confidence threshold: $CONFIDENCE (must be numeric)" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + + if [[ "$CONFIDENCE" -lt 0 || "$CONFIDENCE" -gt 100 ]]; then + echo "::error::Confidence threshold must be between 0 and 100, got: $CONFIDENCE" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + + # Validate diff_max_chars + DIFF_MAX="${{ steps.extract.outputs.diff-max-chars }}" + if ! [[ "$DIFF_MAX" =~ ^[0-9]+$ ]]; then + echo "::error::Invalid diff_max_chars: $DIFF_MAX (must be numeric)" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + if [[ "$DIFF_MAX" -lt 1000 ]]; then + echo "::error::diff_max_chars must be >= 1000, got: $DIFF_MAX" + echo "should-analyze=false" >> $GITHUB_OUTPUT + exit 1 + fi + + echo "should-analyze=true" >> $GITHUB_OUTPUT + echo "✓ All inputs validated successfully" + analyze: name: 'Analyze PR' + needs: validate-inputs + if: needs.validate-inputs.outputs.should-analyze == 'true' runs-on: ubuntu-latest permissions: contents: read @@ -58,7 +153,9 @@ jobs: - name: Setup Node.js uses: actions/setup-node@v6 with: - node-version: '22' + node-version-file: .nvmrc + cache: 'npm' + cache-dependency-path: scripts/ai-content-detection/package.json - name: Install GitHub Copilot CLI run: npm install -g @github/copilot @@ -71,21 +168,42 @@ jobs: working-directory: ./scripts/ai-content-detection env: GH_TOKEN: ${{ steps.otelbot-token.outputs.token }} - PR_NUMBER: - ${{ github.event.inputs.pr_number || - github.event.pull_request.number }} + COPILOT_TOKEN: ${{ secrets.COPILOT_TOKEN }} GITHUB_REPOSITORY: ${{ github.repository }} + PR_NUMBER: ${{ needs.validate-inputs.outputs.pr-number }} CONFIDENCE_THRESHOLD: - ${{ github.event.inputs.confidence_threshold || '80' }} + ${{ needs.validate-inputs.outputs.confidence-threshold }} PR_LABEL: ${{ github.event.inputs.pr_label || 'ai-generated' }} SKIP_USERS: ${{ github.event.inputs.skip_users || '' }} FAIL_ON_DETECTION: - ${{ (github.event_name == 'workflow_dispatch' && - github.event.inputs.fail_on_detection == true) && 'true' || 'false' - }} - DRY_RUN: - ${{ (github.event_name == 'workflow_dispatch' && - github.event.inputs.dry_run == true) && 'true' || 'true' }} + ${{ needs.validate-inputs.outputs.fail-on-detection }} + DRY_RUN: ${{ needs.validate-inputs.outputs.dry-run }} CUSTOM_PROMPT: ${{ github.event.inputs.custom_prompt || '' }} - DIFF_MAX_CHARS: ${{ github.event.inputs.diff_max_chars || '20000' }} + DIFF_MAX_CHARS: ${{ needs.validate-inputs.outputs.diff-max-chars }} run: node analyze.js + + report-results: + name: 'Report Results' + needs: [validate-inputs, analyze] + if: always() && needs.validate-inputs.result == 'success' + runs-on: ubuntu-latest + steps: + - name: Add job summary + run: | + cat >> $GITHUB_STEP_SUMMARY <<'EOF' + ## AI Content Detection Results + + | Item | Value | + |------|-------| + | PR Number | #${{ needs.validate-inputs.outputs.pr-number }} | + | Analysis Status | ${{ needs.analyze.result }} | + | Dry Run Mode | ${{ needs.validate-inputs.outputs.dry-run }} | + | Confidence Threshold | ${{ needs.validate-inputs.outputs.confidence-threshold }}% | + | Fail on Detection | ${{ needs.validate-inputs.outputs.fail-on-detection }} | + EOF + + - name: Check analysis result + if: needs.analyze.result == 'failure' + run: | + echo "::error::AI content analysis failed or detected AI-generated content" + exit 1 diff --git a/scripts/ai-content-detection/README.md b/scripts/ai-content-detection/README.md index 7fc271cc2d1b..285b7c7e940f 100644 --- a/scripts/ai-content-detection/README.md +++ b/scripts/ai-content-detection/README.md @@ -21,6 +21,7 @@ Analyzes PR diffs using GitHub Copilot CLI to detect AI-generated content. ```bash export GH_TOKEN="your-github-token" + export COPILOT_TOKEN="your-pat-with-copilot-access" # Optional but required for AI detection export PR_NUMBER="123" export GITHUB_REPOSITORY="open-telemetry/opentelemetry.io" @@ -41,18 +42,24 @@ Analyzes PR diffs using GitHub Copilot CLI to detect AI-generated content. ## Configuration -| Variable | Required | Default | Description | -| ---------------------- | -------- | -------------- | ----------------------------- | -| `GH_TOKEN` | Yes | - | GitHub token with repo access | -| `PR_NUMBER` | Yes | - | Pull request number | -| `GITHUB_REPOSITORY` | Yes | - | Repository (owner/repo) | -| `CONFIDENCE_THRESHOLD` | No | `80` | Detection threshold (0-100) | -| `PR_LABEL` | No | `ai-generated` | Label for detected PRs | -| `SKIP_USERS` | No | `""` | Comma-separated users to skip | -| `FAIL_ON_DETECTION` | No | `false` | Fail if AI detected | -| `DRY_RUN` | No | `true` | Test mode (no PR updates) | -| `CUSTOM_PROMPT` | No | `""` | Custom analysis prompt | -| `DIFF_MAX_CHARS` | No | `20000` | Max diff characters | +| Variable | Required | Default | Description | +| ---------------------- | -------- | -------------- | ------------------------------ | +| `GH_TOKEN` | Yes | - | GitHub token with repo access | +| `COPILOT_TOKEN` | No[^1] | - | GitHub PAT with Copilot access | +| `PR_NUMBER` | Yes | - | Pull request number | +| `GITHUB_REPOSITORY` | Yes | - | Repository (owner/repo) | +| `CONFIDENCE_THRESHOLD` | No | `80` | Detection threshold (0-100) | +| `PR_LABEL` | No | `ai-generated` | Label for detected PRs | +| `SKIP_USERS` | No | `""` | Comma-separated users to skip | +| `FAIL_ON_DETECTION` | No | `false` | Fail if AI detected | +| `DRY_RUN` | No | `true` | Test mode (no PR updates) | +| `CUSTOM_PROMPT` | No | `""` | Custom analysis prompt | +| `DIFF_MAX_CHARS` | No | `20000` | Max diff characters | + +[^1]: + +Required for AI detection to run. Without it, the script will skip Copilot +analysis. ## Quick Test Example @@ -74,3 +81,24 @@ node analyze.js 3. Parses confidence score from response 4. If score ≥ threshold: posts comment, adds label, optionally fails 5. Dry run mode: analyzes but skips all PR modifications + +## Token Requirements + +This script requires two GitHub tokens with different permission scopes: + +### GH_TOKEN (GitHub App Installation Token) + +- **Used for**: GitHub API operations (fetching PR details, posting comments, + adding labels) +- **Permissions**: `contents:read`, `pull-requests:write`, `issues:write` +- **Source in CI**: Auto-generated by otelbot GitHub App +- **Local testing**: Use any GitHub PAT with repo access or `$(gh auth token)` + +### COPILOT_TOKEN (Personal Access Token) - Optional but Required for AI Detection + +- **Used for**: GitHub Copilot CLI operations only +- **Permissions**: User-level "Copilot Requests: Read and write" +- **Format**: Fine-grained PAT starting with `github_pat_` +- **Why separate**: GitHub App tokens cannot access user-level Copilot features +- **Behavior when missing**: Script skips Copilot analysis and exits + successfully diff --git a/scripts/ai-content-detection/analyze.js b/scripts/ai-content-detection/analyze.js index 91f232637457..8bc76d01a3d6 100644 --- a/scripts/ai-content-detection/analyze.js +++ b/scripts/ai-content-detection/analyze.js @@ -14,6 +14,7 @@ class AIDetectionConfig { constructor() { // Required configuration this.ghToken = process.env.GH_TOKEN; + this.copilotToken = process.env.COPILOT_TOKEN || null; this.prNumber = parseInt(process.env.PR_NUMBER, 10); this.repo = process.env.GITHUB_REPOSITORY; @@ -41,6 +42,15 @@ class AIDetectionConfig { throw new Error('GH_TOKEN is required'); } + if (!this.copilotToken) { + console.warn( + 'WARNING: COPILOT_TOKEN not set. Copilot analysis will be skipped.', + ); + console.warn( + 'To enable AI detection, configure COPILOT_TOKEN with a PAT that has Copilot access.', + ); + } + if (!this.prNumber || isNaN(this.prNumber)) { throw new Error('PR_NUMBER is required and must be a number'); } @@ -149,11 +159,20 @@ function escapeShellArg(arg) { /** * Runs Copilot CLI analysis on the diff + * + * Note: Uses config.copilotToken instead of config.ghToken because + * Copilot CLI requires user-level "Copilot Requests" permission. + * * @param {string} diff - Git diff content * @param {AIDetectionConfig} config - Configuration object - * @returns {string} Copilot analysis output + * @returns {string|null} Copilot analysis output, or null if COPILOT_TOKEN unavailable */ function runCopilotAnalysis(diff, config) { + if (!config.copilotToken) { + console.log('Skipping Copilot analysis - COPILOT_TOKEN not configured'); + return null; + } + console.log('Running Copilot analysis...'); // Build prompt @@ -167,7 +186,7 @@ function runCopilotAnalysis(diff, config) { const output = execSync(command, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024, // 10MB buffer - env: { ...process.env, GH_TOKEN: config.ghToken }, + env: { ...process.env, GH_TOKEN: config.copilotToken }, }); console.log('--- Copilot Analysis Output ---'); @@ -321,6 +340,12 @@ async function main() { // 5. Run Copilot analysis const analysis = runCopilotAnalysis(diff, cfg); + // Handle case where Copilot analysis was skipped + if (!analysis) { + console.log('No Copilot analysis performed. Exiting without detection.'); + process.exit(0); + } + // 6. Parse confidence score const score = parseConfidenceScore(analysis);