Skip to content

Commit 1630f1f

Browse files
chefsalecursoragent
andcommitted
Cross-review: diff truncate at newline; min-agreement help; webhook env vars; tests
Co-authored-by: Cursor <[email protected]>
1 parent 5deec74 commit 1630f1f

4 files changed

Lines changed: 206 additions & 2 deletions

File tree

src/ai_reviewer/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def cli(verbose: bool) -> None:
6868
"min_agreement",
6969
type=click.FloatRange(0.0, 1.0),
7070
default=2 / 3,
71-
help="Fraction of assessing agents that must mark a finding valid to keep it (default: 2/3)",
71+
help="Fraction of assessing agents that must mark a finding valid (default: 2/3; with 2 agents, both must agree)",
7272
)
7373
def review_pr(
7474
repo: str,

src/ai_reviewer/github/webhook.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ async def default_review_handler(repo: str, pr_number: int) -> None:
153153
cursor_config=cursor_config,
154154
github_token=github_token,
155155
num_agents=int(os.environ.get("NUM_AGENTS", "3")),
156+
enable_cross_review=os.environ.get("ENABLE_CROSS_REVIEW", "true").lower()
157+
!= "false",
158+
min_validation_agreement=float(
159+
os.environ.get("MIN_VALIDATION_AGREEMENT", str(2 / 3))
160+
),
156161
)
157162

158163
if review.all_agents_failed:

src/ai_reviewer/review.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,11 @@ def get_cross_review_prompt(
216216
)
217217
findings_text = "\n".join(findings_blob)
218218

219+
# Truncate at newline boundary to avoid cutting mid-line or mid-hunk
220+
diff_excerpt = diff[:_CROSS_REVIEW_DIFF_MAX_CHARS]
221+
if "\n" in diff_excerpt:
222+
diff_excerpt = diff_excerpt.rsplit("\n", 1)[0]
223+
219224
return f"""You are in a **cross-review round**. Multiple agents already produced the findings below for this PR. Your job is to validate them and rank by importance.
220225
221226
## PR
@@ -224,7 +229,7 @@ def get_cross_review_prompt(
224229
225230
## Code diff (excerpt)
226231
```diff
227-
{diff[:_CROSS_REVIEW_DIFF_MAX_CHARS]}
232+
{diff_excerpt}
228233
```
229234
230235
## Findings to validate and rank

tests/test_review.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
"""Tests for the review module, particularly aggregate_findings."""
22

3+
from datetime import datetime
4+
35
import pytest
46

7+
from ai_reviewer.models.context import ReviewContext
8+
from ai_reviewer.models.findings import Category, ConsolidatedFinding, Severity
9+
from ai_reviewer.models.review import ConsolidatedReview
510
from ai_reviewer.review import (
611
_cluster_raw_findings,
712
_detect_pr_type,
813
_raw_findings_similar,
914
aggregate_findings,
15+
apply_cross_review,
16+
get_cross_review_prompt,
17+
parse_cross_review_response,
1018
)
1119

1220

@@ -381,3 +389,189 @@ def test_full_consensus_all_agents_find_same_issue(self):
381389
assert len(result.findings) == 1
382390
assert result.findings[0].consensus_score == pytest.approx(1.0, rel=0.01)
383391
assert len(result.findings[0].agreeing_agents) == 3
392+
393+
394+
def _make_finding(fid: str, severity: Severity = Severity.WARNING) -> ConsolidatedFinding:
395+
"""Minimal ConsolidatedFinding for cross-review tests."""
396+
return ConsolidatedFinding(
397+
id=fid,
398+
file_path="src/foo.py",
399+
line_start=10,
400+
line_end=12,
401+
severity=severity,
402+
category=Category.LOGIC,
403+
title="Test finding",
404+
description="Description",
405+
suggested_fix=None,
406+
consensus_score=1.0,
407+
agreeing_agents=["agent-1"],
408+
confidence=0.9,
409+
)
410+
411+
412+
def _make_review(findings: list[ConsolidatedFinding]) -> ConsolidatedReview:
413+
"""Minimal ConsolidatedReview for cross-review tests."""
414+
return ConsolidatedReview(
415+
id="review-1",
416+
created_at=datetime.now(),
417+
repo="test/repo",
418+
pr_number=1,
419+
findings=findings,
420+
summary="Summary",
421+
agent_count=3,
422+
review_quality_score=0.9,
423+
total_review_time_ms=0,
424+
failed_agents=[],
425+
)
426+
427+
428+
class TestParseCrossReviewResponse:
429+
"""Tests for parse_cross_review_response."""
430+
431+
def test_valid_json(self):
432+
content = '{"assessments": [{"id": "finding-1", "valid": true, "rank": 1}], "summary": "OK"}'
433+
assessments, summary = parse_cross_review_response(content)
434+
assert len(assessments) == 1
435+
assert assessments[0]["id"] == "finding-1"
436+
assert assessments[0]["valid"] is True
437+
assert assessments[0]["rank"] == 1
438+
assert summary == "OK"
439+
440+
def test_markdown_fenced_json(self):
441+
content = """Some text
442+
```json
443+
{"assessments": [{"id": "f1", "valid": false, "rank": 2}], "summary": "Done"}
444+
```
445+
"""
446+
assessments, summary = parse_cross_review_response(content)
447+
assert len(assessments) == 1
448+
assert assessments[0]["id"] == "f1"
449+
assert assessments[0]["valid"] is False
450+
assert summary == "Done"
451+
452+
def test_malformed_input_returns_empty(self):
453+
assessments, summary = parse_cross_review_response("not json at all")
454+
assert assessments == []
455+
assert summary == ""
456+
457+
def test_invalid_json_returns_empty(self):
458+
content = '{"assessments": [invalid]}'
459+
assessments, summary = parse_cross_review_response(content)
460+
assert assessments == []
461+
assert summary == ""
462+
463+
464+
class TestApplyCrossReview:
465+
"""Tests for apply_cross_review."""
466+
467+
def test_no_assessments_returns_unchanged(self):
468+
review = _make_review([_make_finding("f1")])
469+
result = apply_cross_review(review, [])
470+
assert result.findings == review.findings
471+
assert result.summary == review.summary
472+
473+
def test_no_votes_for_finding_kept(self):
474+
"""Findings with zero votes are kept (not counted as rejected)."""
475+
review = _make_review([_make_finding("f1"), _make_finding("f2")])
476+
# Only agent assesses f1; f2 gets no votes
477+
all_assessments = [
478+
("agent-1", [{"id": "f1", "valid": True, "rank": 1}]),
479+
]
480+
result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5)
481+
assert len(result.findings) == 2
482+
ids = [f.id for f in result.findings]
483+
assert "f1" in ids and "f2" in ids
484+
485+
def test_partial_votes_uses_len_votes_not_n_agents(self):
486+
"""Valid ratio is over assessing agents, not total agents."""
487+
review = _make_review([_make_finding("f1")])
488+
# 1 valid out of 1 assessing agent -> ratio 1.0, kept
489+
all_assessments = [("agent-1", [{"id": "f1", "valid": True, "rank": 1}])]
490+
result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5)
491+
assert len(result.findings) == 1
492+
# 1 valid, 1 invalid from 2 agents that assessed it -> 0.5
493+
all_assessments = [
494+
("agent-1", [{"id": "f1", "valid": True, "rank": 1}]),
495+
("agent-2", [{"id": "f1", "valid": False, "rank": 5}]),
496+
]
497+
result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5)
498+
assert len(result.findings) == 1
499+
result = apply_cross_review(review, all_assessments, min_validation_agreement=0.67)
500+
assert len(result.findings) == 0
501+
502+
def test_threshold_drops_below_keeps_at_or_above(self):
503+
review = _make_review([_make_finding("f1")])
504+
# 2/3 valid
505+
all_assessments = [
506+
("a1", [{"id": "f1", "valid": True, "rank": 1}]),
507+
("a2", [{"id": "f1", "valid": True, "rank": 2}]),
508+
("a3", [{"id": "f1", "valid": False, "rank": 3}]),
509+
]
510+
result = apply_cross_review(review, all_assessments, min_validation_agreement=2 / 3)
511+
assert len(result.findings) == 1
512+
result = apply_cross_review(review, all_assessments, min_validation_agreement=0.9)
513+
assert len(result.findings) == 0
514+
515+
def test_reordered_by_avg_rank_then_severity(self):
516+
f1 = _make_finding("f1", Severity.WARNING)
517+
f2 = _make_finding("f2", Severity.CRITICAL)
518+
f3 = _make_finding("f3", Severity.SUGGESTION)
519+
review = _make_review([f1, f2, f3])
520+
# f1 rank 3, f2 rank 1, f3 rank 2 -> order f2, f3, f1
521+
all_assessments = [
522+
("a1", [{"id": "f1", "valid": True, "rank": 3}, {"id": "f2", "valid": True, "rank": 1}, {"id": "f3", "valid": True, "rank": 2}]),
523+
]
524+
result = apply_cross_review(review, all_assessments)
525+
assert [x.id for x in result.findings] == ["f2", "f3", "f1"]
526+
527+
def test_summary_unchanged_when_nothing_dropped_or_reordered(self):
528+
review = _make_review([_make_finding("f1")])
529+
all_assessments = [("a1", [{"id": "f1", "valid": True, "rank": 1}])]
530+
result = apply_cross_review(review, all_assessments)
531+
assert result.summary == review.summary
532+
533+
def test_summary_appends_when_dropped(self):
534+
review = _make_review([_make_finding("f1"), _make_finding("f2")])
535+
all_assessments = [
536+
("a1", [{"id": "f1", "valid": True, "rank": 1}, {"id": "f2", "valid": False, "rank": 2}]),
537+
("a2", [{"id": "f1", "valid": True, "rank": 1}, {"id": "f2", "valid": False, "rank": 2}]),
538+
]
539+
result = apply_cross_review(review, all_assessments, min_validation_agreement=1.0)
540+
assert len(result.findings) == 1
541+
assert "1 finding(s) dropped" in result.summary
542+
543+
544+
class TestGetCrossReviewPrompt:
545+
"""Tests for get_cross_review_prompt."""
546+
547+
def test_diff_truncated_at_newline(self):
548+
"""Diff excerpt does not cut mid-line."""
549+
context = ReviewContext(
550+
repo_name="test/repo",
551+
pr_number=1,
552+
pr_title="Title",
553+
pr_description="",
554+
base_branch="main",
555+
head_branch="feature",
556+
author="dev",
557+
changed_files_count=1,
558+
additions=10,
559+
deletions=2,
560+
)
561+
review = _make_review([_make_finding("finding-1")])
562+
# Create diff that would be cut at 50 chars (mid-line)
563+
line = "a" * 30 + "\n" + "b" * 30
564+
diff = line + "\nlast"
565+
import ai_reviewer.review as review_mod
566+
567+
old_max = review_mod._CROSS_REVIEW_DIFF_MAX_CHARS
568+
try:
569+
review_mod._CROSS_REVIEW_DIFF_MAX_CHARS = 50
570+
prompt = get_cross_review_prompt(context, review, diff)
571+
# Excerpt should end at newline, not mid "b"
572+
assert "```diff" in prompt
573+
excerpt = prompt.split("```diff")[1].split("```")[0].strip()
574+
assert excerpt.endswith("a" * 30)
575+
assert not excerpt.endswith("b")
576+
finally:
577+
review_mod._CROSS_REVIEW_DIFF_MAX_CHARS = old_max

0 commit comments

Comments
 (0)