From 4697c9872840a4c11dcfaa994b506a7be4a46bda Mon Sep 17 00:00:00 2001 From: Augustus Nguyen Date: Tue, 3 Mar 2026 16:30:09 +0700 Subject: [PATCH 1/2] fix: prevent duplicate security review comments on PRs --- .github/workflows/sast.yml | 3 ++ scripts/comment-pr-findings.js | 67 ++++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/.github/workflows/sast.yml b/.github/workflows/sast.yml index ea7b0b0..2dbd85b 100644 --- a/.github/workflows/sast.yml +++ b/.github/workflows/sast.yml @@ -10,6 +10,9 @@ on: jobs: security-review: runs-on: ubuntu-24.04 + concurrency: + group: security-review-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true permissions: contents: read pull-requests: write diff --git a/scripts/comment-pr-findings.js b/scripts/comment-pr-findings.js index dc92664..bf0fa69 100755 --- a/scripts/comment-pr-findings.js +++ b/scripts/comment-pr-findings.js @@ -53,6 +53,27 @@ function ghApi(endpoint, method = 'GET', data = null) { } } +// Paginated GitHub API helper — fetches all pages for list endpoints +function ghApiAll(baseEndpoint) { + const allItems = []; + let page = 1; + const separator = baseEndpoint.includes('?') ? '&' : '?'; + + while (true) { + const endpoint = `${baseEndpoint}${separator}per_page=100&page=${page}`; + const items = ghApi(endpoint); + + if (!Array.isArray(items) || items.length === 0) break; + + allItems.push(...items); + + if (items.length < 100) break; + page++; + } + + return allItems; +} + // Helper function to add reactions to a comment function addReactionsToComment(commentId, isReviewComment = true) { const reactions = ['+1', '-1']; // thumbs up and thumbs down @@ -173,19 +194,43 @@ async function run() { return; } - // Check for existing review comments to avoid duplicates - const comments = ghApi(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`); - - // Check if we've already commented on these findings - const existingSecurityComments = comments.filter(comment => - comment.user.type === 'Bot' && - comment.body && comment.body.includes('🤖 **Security Issue:') - ); - - if (existingSecurityComments.length > 0) { - console.log(`Found ${existingSecurityComments.length} existing security comments, skipping to avoid duplicates`); + // Fetch all existing PR review comments (paginated) to deduplicate per finding + const allComments = ghApiAll(`/repos/${context.repo.owner}/${context.repo.repo}/pulls/${context.issue.number}/comments`); + + // Build a set of fingerprints from existing security comments (file:line:message) + const existingFingerprints = new Set(); + for (const comment of allComments) { + if (comment.body && comment.body.includes('🤖 **Security Issue:')) { + const path = comment.path || ''; + const line = comment.line || comment.original_line || ''; + // Extract the message from the comment body header line + const match = comment.body.match(/🤖 \*\*Security Issue: (.+?)\*\*/); + const msg = match ? match[1] : ''; + existingFingerprints.add(`${path}:${line}:${msg}`); + } + } + + // Filter out findings that already have a matching comment + const newReviewComments = reviewComments.filter(rc => { + const match = rc.body.match(/🤖 \*\*Security Issue: (.+?)\*\*/); + const msg = match ? match[1] : ''; + const fingerprint = `${rc.path}:${rc.line}:${msg}`; + if (existingFingerprints.has(fingerprint)) { + console.log(`Skipping duplicate finding on ${rc.path}:${rc.line}`); + return false; + } + return true; + }); + + if (newReviewComments.length === 0) { + console.log('All findings already commented, skipping to avoid duplicates'); return; } + + console.log(`Posting ${newReviewComments.length} new finding(s) (${reviewComments.length - newReviewComments.length} duplicate(s) skipped)`); + // Replace the original reviewComments with the deduplicated list + reviewComments.length = 0; + reviewComments.push(...newReviewComments); try { // Create a review with all the comments From bae5381a4a3ee0aaf9330d5332c0550af9c98c3a Mon Sep 17 00:00:00 2001 From: Augustus Nguyen Date: Tue, 3 Mar 2026 16:51:51 +0700 Subject: [PATCH 2/2] feat: add unittest for deduplicating comment --- scripts/comment-pr-findings.bun.test.js | 525 ++++++++++++++++++++++++ 1 file changed, 525 insertions(+) diff --git a/scripts/comment-pr-findings.bun.test.js b/scripts/comment-pr-findings.bun.test.js index f4596dc..e32bb5b 100644 --- a/scripts/comment-pr-findings.bun.test.js +++ b/scripts/comment-pr-findings.bun.test.js @@ -497,6 +497,531 @@ describe('comment-pr-findings.js', () => { }); }); + describe('Per-finding Deduplication', () => { + test('should skip all findings when all are already commented', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'SQL injection detected', + severity: 'HIGH', + category: 'injection' + }]; + + const existingComments = [{ + id: 1, + path: 'test.py', + line: 10, + body: '🤖 **Security Issue: SQL injection detected**\n\n**Severity:** HIGH\n**Category:** injection\n**Tool:** ClaudeCode AI Security Analysis\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on test.py:10'); + expect(logCalls).toContain('All findings already commented, skipping to avoid duplicates'); + }); + + test('should post only new findings when some already exist', async () => { + const mockFindings = [ + { file: 'test.py', line: 10, description: 'SQL injection detected', severity: 'HIGH', category: 'injection' }, + { file: 'app.py', line: 25, description: 'XSS vulnerability found', severity: 'MEDIUM', category: 'xss' } + ]; + + const existingComments = [{ + id: 1, + path: 'test.py', + line: 10, + body: '🤖 **Security Issue: SQL injection detected**\n\n**Severity:** HIGH\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([ + { filename: 'test.py' }, + { filename: 'app.py' } + ]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on test.py:10'); + expect(logCalls).toContain('Posting 1 new finding(s) (1 duplicate(s) skipped)'); + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + expect(capturedReviewData.comments[0].path).toBe('app.py'); + expect(capturedReviewData.comments[0].body).toContain('XSS vulnerability found'); + }); + + test('should not treat non-security comments as duplicates', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'SQL injection detected', + severity: 'HIGH', + category: 'injection' + }]; + + const existingComments = [{ + id: 1, + path: 'test.py', + line: 10, + body: 'Regular review comment - looks good!', + user: { type: 'User' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Created review with 1 inline comments'); + }); + + test('should allow same message on different files', async () => { + const mockFindings = [ + { file: 'a.py', line: 5, description: 'Hardcoded secret', severity: 'HIGH', category: 'secret' }, + { file: 'b.py', line: 5, description: 'Hardcoded secret', severity: 'HIGH', category: 'secret' } + ]; + + const existingComments = [{ + id: 1, + path: 'a.py', + line: 5, + body: '🤖 **Security Issue: Hardcoded secret**\n\n**Severity:** HIGH\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([ + { filename: 'a.py' }, + { filename: 'b.py' } + ]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on a.py:5'); + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + expect(capturedReviewData.comments[0].path).toBe('b.py'); + }); + + test('should allow same file and message on different lines', async () => { + const mockFindings = [ + { file: 'test.py', line: 10, description: 'Hardcoded secret', severity: 'HIGH', category: 'secret' }, + { file: 'test.py', line: 50, description: 'Hardcoded secret', severity: 'HIGH', category: 'secret' } + ]; + + const existingComments = [{ + id: 1, + path: 'test.py', + line: 10, + body: '🤖 **Security Issue: Hardcoded secret**\n\n**Severity:** HIGH\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on test.py:10'); + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + expect(capturedReviewData.comments[0].line).toBe(50); + }); + + test('should use original_line for fingerprint when line is missing', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'SQL injection detected', + severity: 'HIGH', + category: 'injection' + }]; + + const existingComments = [{ + id: 1, + path: 'test.py', + original_line: 10, + body: '🤖 **Security Issue: SQL injection detected**\n\n**Severity:** HIGH\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: JSON.stringify(existingComments), stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on test.py:10'); + expect(logCalls).toContain('All findings already commented, skipping to avoid duplicates'); + }); + }); + + describe('Paginated Comment Fetching', () => { + test('should fetch multiple pages to find existing security comments', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'SQL injection detected', + severity: 'HIGH', + category: 'injection' + }]; + + const page1Comments = Array.from({ length: 100 }, (_, i) => ({ + id: i + 1, + path: 'other.py', + line: i + 1, + body: `Regular comment ${i + 1}`, + user: { type: 'User' } + })); + + const page2Comments = [{ + id: 200, + path: 'test.py', + line: 10, + body: '🤖 **Security Issue: SQL injection detected**\n\n**Severity:** HIGH\n', + user: { type: 'Bot' } + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + if (endpoint.includes('&page=1')) { + return { status: 0, stdout: JSON.stringify(page1Comments), stderr: '' }; + } + if (endpoint.includes('&page=2')) { + return { status: 0, stdout: JSON.stringify(page2Comments), stderr: '' }; + } + return { status: 0, stdout: '[]', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Skipping duplicate finding on test.py:10'); + expect(logCalls).toContain('All findings already commented, skipping to avoid duplicates'); + }); + + test('should stop paginating when a page has fewer than 100 items', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'New finding', + severity: 'HIGH', + category: 'injection' + }]; + + const page1Comments = Array.from({ length: 50 }, (_, i) => ({ + id: i + 1, + path: 'other.py', + line: i + 1, + body: `Regular comment ${i + 1}`, + user: { type: 'User' } + })); + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + const pagesRequested = []; + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + const pageMatch = endpoint.match(/&page=(\d+)/); + if (pageMatch) pagesRequested.push(parseInt(pageMatch[1])); + + if (endpoint.includes('&page=1')) { + return { status: 0, stdout: JSON.stringify(page1Comments), stderr: '' }; + } + return { status: 0, stdout: '[]', stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + expect(pagesRequested).toEqual([1]); + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + }); + + test('should handle empty comment list on first page', async () => { + const mockFindings = [{ + file: 'test.py', + line: 10, + description: 'New finding', + severity: 'HIGH', + category: 'injection' + }]; + + readFileSyncSpy.mockImplementation((path) => { + if (path.includes('github-event.json')) { + return JSON.stringify({ + pull_request: { number: 123, head: { sha: 'abc123' } } + }); + } + if (path === 'findings.json') { + return JSON.stringify(mockFindings); + } + }); + + let capturedReviewData; + spawnSyncSpy.mockImplementation((cmd, args, options) => { + if (cmd === 'gh' && args.includes('api')) { + const endpoint = args[1]; + const method = args[args.indexOf('--method') + 1] || 'GET'; + + if (endpoint.includes('/pulls/123/files')) { + return { status: 0, stdout: JSON.stringify([{ filename: 'test.py' }]), stderr: '' }; + } + if (endpoint.includes('/pulls/123/comments') && method === 'GET') { + return { status: 0, stdout: '[]', stderr: '' }; + } + if (endpoint.includes('/pulls/123/reviews') && method === 'POST') { + if (options && options.input) { + capturedReviewData = JSON.parse(options.input); + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + } + return { status: 0, stdout: '{}', stderr: '' }; + }); + + await import('./comment-pr-findings.js'); + + const logCalls = consoleLogSpy.mock.calls.map(c => c[0]); + expect(logCalls).toContain('Posting 1 new finding(s) (0 duplicate(s) skipped)'); + expect(capturedReviewData).toBeDefined(); + expect(capturedReviewData.comments).toHaveLength(1); + }); + }); + describe('Error Handling', () => { test('should handle GitHub API errors gracefully', async () => {