open-telemetry · vitorvasc · Dec 12, 2025 · Dec 12, 2025 · Jan 27, 2026 · Jan 27, 2026
@@ -36,10 +36,97 @@ on:
         default: '20000'
         type: string
   pull_request_target:
+    types: [opened, synchronize, reopened]
+
+concurrency:
+  group:
+    ai-detection-${{ github.event.pull_request.number ||
+    github.event.inputs.pr_number || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
+  validate-inputs:
+    name: 'Validate Inputs'
+    runs-on: ubuntu-latest
+    outputs:
+      pr-number: ${{ steps.extract.outputs.pr-number }}
+      confidence-threshold: ${{ steps.extract.outputs.confidence-threshold }}
+      should-analyze: ${{ steps.validate.outputs.should-analyze }}
+      dry-run: ${{ steps.extract.outputs.dry-run }}
+      fail-on-detection: ${{ steps.extract.outputs.fail-on-detection }}
+    steps:
+      - name: Extract and set defaults
+        id: extract
+        run: |
+          # Extract PR number
+          PR_NUMBER="${{ github.event.inputs.pr_number || github.event.pull_request.number }}"
+          echo "pr-number=${PR_NUMBER}" >> $GITHUB_OUTPUT
+
+          # Extract confidence threshold
+          CONFIDENCE="${{ github.event.inputs.confidence_threshold || '80' }}"
+          echo "confidence-threshold=${CONFIDENCE}" >> $GITHUB_OUTPUT
+
+          # Set DRY_RUN: always true for pull_request_target, respect input for workflow_dispatch
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            echo "dry-run=true" >> $GITHUB_OUTPUT
+          else
+            echo "dry-run=${{ github.event.inputs.dry_run }}" >> $GITHUB_OUTPUT
+          fi
+
+          # Set FAIL_ON_DETECTION: only true for workflow_dispatch with explicit input
+          if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.fail_on_detection }}" == "true" ]]; then
+            echo "fail-on-detection=true" >> $GITHUB_OUTPUT
+          else
+            echo "fail-on-detection=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Validate inputs
+        id: validate
+        run: |
+          PR_NUMBER="${{ steps.extract.outputs.pr-number }}"
+          CONFIDENCE="${{ steps.extract.outputs.confidence-threshold }}"
+
+          # Validate PR number
+          if [[ -z "$PR_NUMBER" ]]; then
+            echo "::error::PR number is required"
+            echo "should-analyze=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then
+            echo "::error::Invalid PR number: $PR_NUMBER (must be numeric)"
+            echo "should-analyze=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Validate confidence threshold
+          if ! [[ "$CONFIDENCE" =~ ^[0-9]+$ ]]; then
+            echo "::error::Invalid confidence threshold: $CONFIDENCE (must be numeric)"
+            echo "should-analyze=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          if [[ "$CONFIDENCE" -lt 0 || "$CONFIDENCE" -gt 100 ]]; then
+            echo "::error::Confidence threshold must be between 0 and 100, got: $CONFIDENCE"
+            echo "should-analyze=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          # Validate diff_max_chars
+          DIFF_MAX="${{ github.event.inputs.diff_max_chars || '20000' }}"
+          if [[ "$DIFF_MAX" -lt 1000 ]]; then
+            echo "::error::diff_max_chars must be >= 1000, got: $DIFF_MAX"
+            echo "should-analyze=false" >> $GITHUB_OUTPUT
+            exit 1
+          fi
+
+          echo "should-analyze=true" >> $GITHUB_OUTPUT
+          echo "✓ All inputs validated successfully"
+
   analyze:
     name: 'Analyze PR'
+    needs: validate-inputs
+    if: needs.validate-inputs.outputs.should-analyze == 'true'
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -59,33 +146,56 @@ jobs:
         uses: actions/setup-node@v6
         with:
           node-version: '22'
+          cache: 'npm'
+          cache-dependency-path: 'scripts/ai-content-detection/package-lock.json'
 
       - name: Install GitHub Copilot CLI
         run: npm install -g @github/copilot
 
       - name: Install script dependencies
         working-directory: ./scripts/ai-content-detection
-        run: npm install
+        run: npm ci
 
       - name: Run AI detection analysis
         working-directory: ./scripts/ai-content-detection
         env:
           GH_TOKEN: ${{ steps.otelbot-token.outputs.token }}
-          PR_NUMBER:
-            ${{ github.event.inputs.pr_number ||
-            github.event.pull_request.number }}
+          COPILOT_TOKEN: ${{ secrets.COPILOT_TOKEN }}
           GITHUB_REPOSITORY: ${{ github.repository }}
+          PR_NUMBER: ${{ needs.validate-inputs.outputs.pr-number }}
           CONFIDENCE_THRESHOLD:
-            ${{ github.event.inputs.confidence_threshold || '80' }}
+            ${{ needs.validate-inputs.outputs.confidence-threshold }}
           PR_LABEL: ${{ github.event.inputs.pr_label || 'ai-generated' }}
           SKIP_USERS: ${{ github.event.inputs.skip_users || '' }}
           FAIL_ON_DETECTION:
-            ${{ (github.event_name == 'workflow_dispatch' &&
-            github.event.inputs.fail_on_detection == true) && 'true' || 'false'
-            }}
-          DRY_RUN:
-            ${{ (github.event_name == 'workflow_dispatch' &&
-            github.event.inputs.dry_run == true) && 'true' || 'true' }}
+            ${{ needs.validate-inputs.outputs.fail-on-detection }}
+          DRY_RUN: ${{ needs.validate-inputs.outputs.dry-run }}
           CUSTOM_PROMPT: ${{ github.event.inputs.custom_prompt || '' }}
           DIFF_MAX_CHARS: ${{ github.event.inputs.diff_max_chars || '20000' }}
         run: node analyze.js
+
+  report-results:
+    name: 'Report Results'
+    needs: [validate-inputs, analyze]
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Add job summary
+        run: |
+          cat >> $GITHUB_STEP_SUMMARY <<'EOF'
+          ## AI Content Detection Results
+
+          | Item | Value |
+          |------|-------|
+          | PR Number | #${{ needs.validate-inputs.outputs.pr-number }} |
+          | Analysis Status | ${{ needs.analyze.result }} |
+          | Dry Run Mode | ${{ needs.validate-inputs.outputs.dry-run }} |
+          | Confidence Threshold | ${{ needs.validate-inputs.outputs.confidence-threshold }}% |
+          | Fail on Detection | ${{ needs.validate-inputs.outputs.fail-on-detection }} |
+          EOF
+
+      - name: Check analysis result
+        if: needs.analyze.result == 'failure'
+        run: |
+          echo "::error::AI content analysis failed or detected AI-generated content"
+          exit 1
@@ -21,6 +21,7 @@ Analyzes PR diffs using GitHub Copilot CLI to detect AI-generated content.
 
    ```bash
    export GH_TOKEN="your-github-token"
+   export COPILOT_TOKEN="your-pat-with-copilot-access"  # Optional but required for AI detection
    export PR_NUMBER="123"
    export GITHUB_REPOSITORY="open-telemetry/opentelemetry.io"
 
@@ -41,18 +42,22 @@ Analyzes PR diffs using GitHub Copilot CLI to detect AI-generated content.
 
 ## Configuration
 
-| Variable               | Required | Default        | Description                   |
-| ---------------------- | -------- | -------------- | ----------------------------- |
-| `GH_TOKEN`             | Yes      | -              | GitHub token with repo access |
-| `PR_NUMBER`            | Yes      | -              | Pull request number           |
-| `GITHUB_REPOSITORY`    | Yes      | -              | Repository (owner/repo)       |
-| `CONFIDENCE_THRESHOLD` | No       | `80`           | Detection threshold (0-100)   |
-| `PR_LABEL`             | No       | `ai-generated` | Label for detected PRs        |
-| `SKIP_USERS`           | No       | `""`           | Comma-separated users to skip |
-| `FAIL_ON_DETECTION`    | No       | `false`        | Fail if AI detected           |
-| `DRY_RUN`              | No       | `true`         | Test mode (no PR updates)     |
-| `CUSTOM_PROMPT`        | No       | `""`           | Custom analysis prompt        |
-| `DIFF_MAX_CHARS`       | No       | `20000`        | Max diff characters           |
+| Variable               | Required | Default        | Description                    |
+| ---------------------- | -------- | -------------- | ------------------------------ |
+| `GH_TOKEN`             | Yes      | -              | GitHub token with repo access  |
+| `COPILOT_TOKEN`        | No\*     | -              | GitHub PAT with Copilot access |
+| `PR_NUMBER`            | Yes      | -              | Pull request number            |
+| `GITHUB_REPOSITORY`    | Yes      | -              | Repository (owner/repo)        |
+| `CONFIDENCE_THRESHOLD` | No       | `80`           | Detection threshold (0-100)    |
+| `PR_LABEL`             | No       | `ai-generated` | Label for detected PRs         |
+| `SKIP_USERS`           | No       | `""`           | Comma-separated users to skip  |
+| `FAIL_ON_DETECTION`    | No       | `false`        | Fail if AI detected            |
+| `DRY_RUN`              | No       | `true`         | Test mode (no PR updates)      |
+| `CUSTOM_PROMPT`        | No       | `""`           | Custom analysis prompt         |
+| `DIFF_MAX_CHARS`       | No       | `20000`        | Max diff characters            |
+
+\*Required for AI detection to run. Without it, the script will skip Copilot
+analysis.
 
 ## Quick Test Example
 
@@ -74,3 +79,24 @@ node analyze.js
 3. Parses confidence score from response
 4. If score ≥ threshold: posts comment, adds label, optionally fails
 5. Dry run mode: analyzes but skips all PR modifications
+
+## Token Requirements
+
+This script requires two GitHub tokens with different permission scopes:
+
+### GH_TOKEN (GitHub App Installation Token)
+
+- **Used for**: GitHub API operations (fetching PR details, posting comments,
+  adding labels)
+- **Permissions**: `contents:read`, `pull-requests:write`, `issues:write`
+- **Source in CI**: Auto-generated by otelbot GitHub App
+- **Local testing**: Use any GitHub PAT with repo access or `$(gh auth token)`
+
+### COPILOT_TOKEN (Personal Access Token) - Optional but Required for AI Detection
+
+- **Used for**: GitHub Copilot CLI operations only
+- **Permissions**: User-level "Copilot Requests: Read and write"
+- **Format**: Fine-grained PAT starting with `github_pat_`
+- **Why separate**: GitHub App tokens cannot access user-level Copilot features
+- **Behavior when missing**: Script skips Copilot analysis and exits
+  successfully
@@ -14,6 +14,7 @@ class AIDetectionConfig {
   constructor() {
     // Required configuration
     this.ghToken = process.env.GH_TOKEN;
+    this.copilotToken = process.env.COPILOT_TOKEN || null;
     this.prNumber = parseInt(process.env.PR_NUMBER, 10);
     this.repo = process.env.GITHUB_REPOSITORY;
 
@@ -41,6 +42,15 @@ class AIDetectionConfig {
       throw new Error('GH_TOKEN is required');
     }
 
+    if (!this.copilotToken) {
+      console.warn(
+        'WARNING: COPILOT_TOKEN not set. Copilot analysis will be skipped.',
+      );
+      console.warn(
+        'To enable AI detection, configure COPILOT_TOKEN with a PAT that has Copilot access.',
+      );
+    }
+
     if (!this.prNumber || isNaN(this.prNumber)) {
       throw new Error('PR_NUMBER is required and must be a number');
     }
@@ -149,11 +159,20 @@ function escapeShellArg(arg) {
 
 /**
  * Runs Copilot CLI analysis on the diff
+ *
+ * Note: Uses config.copilotToken instead of config.ghToken because
+ * Copilot CLI requires user-level "Copilot Requests" permission.
+ *
  * @param {string} diff - Git diff content
  * @param {AIDetectionConfig} config - Configuration object
- * @returns {string} Copilot analysis output
+ * @returns {string|null} Copilot analysis output, or null if COPILOT_TOKEN unavailable
  */
 function runCopilotAnalysis(diff, config) {
+  if (!config.copilotToken) {
+    console.log('Skipping Copilot analysis - COPILOT_TOKEN not configured');
+    return null;
+  }
+
   console.log('Running Copilot analysis...');
 
   // Build prompt
@@ -167,7 +186,7 @@ function runCopilotAnalysis(diff, config) {
     const output = execSync(command, {
       encoding: 'utf-8',
       maxBuffer: 10 * 1024 * 1024, // 10MB buffer
-      env: { ...process.env, GH_TOKEN: config.ghToken },
+      env: { ...process.env, GH_TOKEN: config.copilotToken },
     });
 
     console.log('--- Copilot Analysis Output ---');
@@ -321,6 +340,12 @@ async function main() {
     // 5. Run Copilot analysis
     const analysis = runCopilotAnalysis(diff, cfg);
 
+    // Handle case where Copilot analysis was skipped
+    if (!analysis) {
+      console.log('No Copilot analysis performed. Exiting without detection.');
+      process.exit(0);
+    }
+
     // 6. Parse confidence score
     const score = parseConfidenceScore(analysis);