Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,16 @@ runs:
echo "ClaudeCode results preview:"
head -n 300 claudecode/claudecode-results.json || echo "Unable to preview results"

# Check if the scan was incomplete (e.g., diff too large)
if jq -e '.scan_status == "incomplete"' claudecode/claudecode-results.json > /dev/null 2>&1; then
ERROR_MSG=$(jq -r '.error' claudecode/claudecode-results.json)
SKIP_REASON=$(jq -r '.analysis_summary.skip_reason // "unknown"' claudecode/claudecode-results.json)
echo "::error::Security scan incomplete: $ERROR_MSG"
echo "::error::The PR diff exceeded GitHub's 20,000-line API limit and local git diff fallback also failed. Security review was NOT performed."
echo "findings_count=0" >> $GITHUB_OUTPUT
echo "scan_status=incomplete" >> $GITHUB_OUTPUT
# Check if the result is an error
if jq -e '.error' claudecode/claudecode-results.json > /dev/null 2>&1; then
elif jq -e '.error' claudecode/claudecode-results.json > /dev/null 2>&1; then
ERROR_MSG=$(jq -r '.error' claudecode/claudecode-results.json)
echo "::warning::ClaudeCode error: $ERROR_MSG"
echo "findings_count=0" >> $GITHUB_OUTPUT
Expand Down
110 changes: 104 additions & 6 deletions claudecode/github_action_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ class AuditError(ValueError):
"""Raised when security audit operations fail."""
pass

class DiffTooLargeError(Exception):
"""Raised when the PR diff exceeds GitHub's API size limit."""
pass

class GitHubActionClient:
"""Simplified GitHub API client for GitHub Actions environment."""

Expand Down Expand Up @@ -116,24 +120,95 @@ def get_pr_data(self, repo_name: str, pr_number: int) -> Dict[str, Any]:
'changed_files': pr_data['changed_files']
}

def get_pr_diff(self, repo_name: str, pr_number: int) -> str:
def get_pr_diff(self, repo_name: str, pr_number: int, base_ref: str = None) -> str:
"""Get complete PR diff in unified format.


First attempts to fetch the diff via the GitHub API. If the API returns
HTTP 406 (diff too large, exceeds GitHub's 20,000-line limit), falls back
to running ``git diff`` locally against the base branch.

Args:
repo_name: Repository name in format "owner/repo"
pr_number: Pull request number

base_ref: Base branch ref for local git diff fallback (e.g. "main")

Returns:
Complete PR diff in unified format

Raises:
DiffTooLargeError: If both the API and local git diff fail
"""
url = f"https://api.github.com/repos/{repo_name}/pulls/{pr_number}"
headers = dict(self.headers)
headers['Accept'] = 'application/vnd.github.diff'

response = requests.get(url, headers=headers)

if response.status_code == 406:
# GitHub API returns 406 when the diff exceeds 20,000 lines
print(f"[Warning] GitHub API returned 406: PR diff too large for API "
f"(exceeds 20,000-line limit). Falling back to local git diff.",
file=sys.stderr)
return self._get_local_diff(base_ref)

response.raise_for_status()

return self._filter_generated_files(response.text)

def _get_local_diff(self, base_ref: str = None) -> str:
"""Fall back to local git diff when the GitHub API cannot return the diff.

The GitHub Action checks out the repository with fetch-depth: 2 at minimum,
so we can compute the diff locally.

Args:
base_ref: Base branch ref to diff against. If not provided, diffs
against HEAD~1.

Returns:
Filtered diff text from local git

Raises:
DiffTooLargeError: If the local git diff also fails
"""
repo_path = os.environ.get('REPO_PATH', os.getcwd())

# Try diffing against the base branch first, then fall back to HEAD~1
diff_targets = []
if base_ref:
diff_targets.append(f"origin/{base_ref}...HEAD")
diff_targets.append("HEAD~1")

last_error = None
for target in diff_targets:
try:
cmd = ['git', 'diff', target]
result = subprocess.run(
cmd,
cwd=repo_path,
capture_output=True,
text=True,
timeout=120
)
if result.returncode == 0 and result.stdout.strip():
print(f"[Info] Successfully obtained diff using: git diff {target}",
file=sys.stderr)
return self._filter_generated_files(result.stdout)
elif result.returncode != 0:
last_error = result.stderr
print(f"[Warning] git diff {target} failed: {result.stderr.strip()}",
file=sys.stderr)
except subprocess.TimeoutExpired:
last_error = "git diff timed out"
print(f"[Warning] git diff {target} timed out", file=sys.stderr)
except Exception as e:
last_error = str(e)
print(f"[Warning] git diff {target} error: {e}", file=sys.stderr)

raise DiffTooLargeError(
f"PR diff exceeds GitHub API limit (20,000 lines) and local git diff "
f"also failed: {last_error}"
)

def _is_excluded(self, filepath: str) -> bool:
"""Check if a file should be excluded based on directory patterns."""
Expand Down Expand Up @@ -573,10 +648,33 @@ def main():
# Get PR data
try:
pr_data = github_client.get_pr_data(repo_name, pr_number)
pr_diff = github_client.get_pr_diff(repo_name, pr_number)
except Exception as e:
print(json.dumps({'error': f'Failed to fetch PR data: {str(e)}'}))
sys.exit(EXIT_GENERAL_ERROR)

# Get PR diff (with fallback to local git diff for large PRs)
base_ref = pr_data.get('base', {}).get('ref')
try:
pr_diff = github_client.get_pr_diff(repo_name, pr_number, base_ref=base_ref)
except DiffTooLargeError as e:
# Both GitHub API and local git diff failed - surface this clearly
error_output = {
'error': f'Diff too large: {str(e)}',
'scan_status': 'incomplete',
'pr_number': pr_number,
'repo': repo_name,
'findings': [],
'analysis_summary': {
'files_reviewed': 0,
'review_completed': False,
'skip_reason': 'diff_too_large'
}
}
print(json.dumps(error_output, indent=2))
sys.exit(EXIT_GENERAL_ERROR)
except Exception as e:
print(json.dumps({'error': f'Failed to fetch PR diff: {str(e)}'}))
sys.exit(EXIT_GENERAL_ERROR)

# Generate security audit prompt
prompt = get_security_audit_prompt(pr_data, pr_diff, custom_scan_instructions=custom_scan_instructions)
Expand Down
178 changes: 176 additions & 2 deletions claudecode/test_github_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@

import pytest
import os
from unittest.mock import Mock, patch
import subprocess
from unittest.mock import Mock, patch, MagicMock

from claudecode.github_action_audit import GitHubActionClient
from claudecode.github_action_audit import GitHubActionClient, DiffTooLargeError


class TestGitHubActionClient:
Expand Down Expand Up @@ -337,3 +338,176 @@ def test_pagination_not_needed_for_pr_files(self, mock_get):
assert len(result['files']) == 100
assert result['files'][0]['filename'] == 'file0.py'
assert result['files'][99]['filename'] == 'file99.py'


class TestDiffTooLarge:
"""Test handling of large PR diffs that exceed GitHub's 20,000-line API limit."""

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_falls_back_to_local_git_diff(self, mock_get, mock_subprocess):
"""Test that a 406 response triggers local git diff fallback."""
# Mock GitHub API returning 406
mock_response = Mock()
mock_response.status_code = 406
mock_response.text = "Sorry, the diff exceeded the maximum number of lines (20000)"
mock_get.return_value = mock_response

# Mock successful local git diff
mock_subprocess.return_value = Mock(
returncode=0,
stdout="diff --git a/src/main.py b/src/main.py\n+import os\n",
stderr=""
)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
result = client.get_pr_diff('owner/repo', 123, base_ref='main')

# Should have attempted local git diff
assert mock_subprocess.called
assert 'import os' in result

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_tries_base_ref_first(self, mock_get, mock_subprocess):
"""Test that fallback tries origin/base_ref...HEAD before HEAD~1."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

mock_subprocess.return_value = Mock(
returncode=0,
stdout="diff --git a/file.py b/file.py\n+new code\n",
stderr=""
)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
client.get_pr_diff('owner/repo', 123, base_ref='main')

# First call should be git diff origin/main...HEAD
first_call_args = mock_subprocess.call_args_list[0]
assert first_call_args[0][0] == ['git', 'diff', 'origin/main...HEAD']

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_falls_back_to_head_parent(self, mock_get, mock_subprocess):
"""Test that if origin/base...HEAD fails, falls back to HEAD~1."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

# First call (origin/main...HEAD) fails, second (HEAD~1) succeeds
mock_subprocess.side_effect = [
Mock(returncode=128, stdout="", stderr="fatal: bad revision"),
Mock(returncode=0, stdout="diff --git a/file.py b/file.py\n+code\n", stderr="")
]

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
result = client.get_pr_diff('owner/repo', 123, base_ref='main')

assert mock_subprocess.call_count == 2
second_call_args = mock_subprocess.call_args_list[1]
assert second_call_args[0][0] == ['git', 'diff', 'HEAD~1']
assert '+code' in result

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_no_base_ref_uses_head_parent(self, mock_get, mock_subprocess):
"""Test fallback without base_ref only tries HEAD~1."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

mock_subprocess.return_value = Mock(
returncode=0,
stdout="diff --git a/file.py b/file.py\n+code\n",
stderr=""
)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
client.get_pr_diff('owner/repo', 123, base_ref=None)

# Should only try HEAD~1 (no base_ref provided)
assert mock_subprocess.call_count == 1
call_args = mock_subprocess.call_args_list[0]
assert call_args[0][0] == ['git', 'diff', 'HEAD~1']

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_all_fallbacks_fail_raises_error(self, mock_get, mock_subprocess):
"""Test that DiffTooLargeError is raised when all fallbacks fail."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

# Both local git diff attempts fail
mock_subprocess.return_value = Mock(
returncode=128,
stdout="",
stderr="fatal: bad revision 'origin/main...HEAD'"
)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
with pytest.raises(DiffTooLargeError, match="PR diff exceeds GitHub API limit"):
client.get_pr_diff('owner/repo', 123, base_ref='main')

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_git_diff_timeout_raises_error(self, mock_get, mock_subprocess):
"""Test that git diff timeout is handled gracefully."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

mock_subprocess.side_effect = subprocess.TimeoutExpired(cmd='git diff', timeout=120)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
with pytest.raises(DiffTooLargeError, match="timed out"):
client.get_pr_diff('owner/repo', 123, base_ref=None)

@patch('requests.get')
def test_get_pr_diff_non_406_error_still_raises(self, mock_get):
"""Test that non-406 HTTP errors still raise normally."""
mock_response = Mock()
mock_response.status_code = 500
mock_response.raise_for_status.side_effect = Exception("Internal Server Error")
mock_get.return_value = mock_response

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token'}):
client = GitHubActionClient()
with pytest.raises(Exception, match="Internal Server Error"):
client.get_pr_diff('owner/repo', 123)

@patch('subprocess.run')
@patch('requests.get')
def test_get_pr_diff_406_local_diff_filters_generated_files(self, mock_get, mock_subprocess):
"""Test that local git diff output is filtered for generated files."""
mock_response = Mock()
mock_response.status_code = 406
mock_get.return_value = mock_response

diff_with_generated = """diff --git a/src/main.py b/src/main.py
+real code
diff --git a/generated/api.py b/generated/api.py
# @generated by protoc
+generated code
"""
mock_subprocess.return_value = Mock(
returncode=0,
stdout=diff_with_generated,
stderr=""
)

with patch.dict(os.environ, {'GITHUB_TOKEN': 'test-token', 'REPO_PATH': '/tmp/repo'}):
client = GitHubActionClient()
result = client.get_pr_diff('owner/repo', 123, base_ref='main')

# Generated file should be filtered out
assert 'src/main.py' in result
assert 'generated/api.py' not in result
Loading