|
1 | 1 | """Tests for the review module, particularly aggregate_findings.""" |
2 | 2 |
|
| 3 | +from datetime import datetime |
| 4 | + |
3 | 5 | import pytest |
4 | 6 |
|
| 7 | +from ai_reviewer.models.context import ReviewContext |
| 8 | +from ai_reviewer.models.findings import Category, ConsolidatedFinding, Severity |
| 9 | +from ai_reviewer.models.review import ConsolidatedReview |
5 | 10 | from ai_reviewer.review import ( |
6 | 11 | _cluster_raw_findings, |
7 | 12 | _detect_pr_type, |
8 | 13 | _raw_findings_similar, |
9 | 14 | aggregate_findings, |
| 15 | + apply_cross_review, |
| 16 | + get_cross_review_prompt, |
| 17 | + parse_cross_review_response, |
10 | 18 | ) |
11 | 19 |
|
12 | 20 |
|
@@ -381,3 +389,189 @@ def test_full_consensus_all_agents_find_same_issue(self): |
381 | 389 | assert len(result.findings) == 1 |
382 | 390 | assert result.findings[0].consensus_score == pytest.approx(1.0, rel=0.01) |
383 | 391 | assert len(result.findings[0].agreeing_agents) == 3 |
| 392 | + |
| 393 | + |
| 394 | +def _make_finding(fid: str, severity: Severity = Severity.WARNING) -> ConsolidatedFinding: |
| 395 | + """Minimal ConsolidatedFinding for cross-review tests.""" |
| 396 | + return ConsolidatedFinding( |
| 397 | + id=fid, |
| 398 | + file_path="src/foo.py", |
| 399 | + line_start=10, |
| 400 | + line_end=12, |
| 401 | + severity=severity, |
| 402 | + category=Category.LOGIC, |
| 403 | + title="Test finding", |
| 404 | + description="Description", |
| 405 | + suggested_fix=None, |
| 406 | + consensus_score=1.0, |
| 407 | + agreeing_agents=["agent-1"], |
| 408 | + confidence=0.9, |
| 409 | + ) |
| 410 | + |
| 411 | + |
| 412 | +def _make_review(findings: list[ConsolidatedFinding]) -> ConsolidatedReview: |
| 413 | + """Minimal ConsolidatedReview for cross-review tests.""" |
| 414 | + return ConsolidatedReview( |
| 415 | + id="review-1", |
| 416 | + created_at=datetime.now(), |
| 417 | + repo="test/repo", |
| 418 | + pr_number=1, |
| 419 | + findings=findings, |
| 420 | + summary="Summary", |
| 421 | + agent_count=3, |
| 422 | + review_quality_score=0.9, |
| 423 | + total_review_time_ms=0, |
| 424 | + failed_agents=[], |
| 425 | + ) |
| 426 | + |
| 427 | + |
| 428 | +class TestParseCrossReviewResponse: |
| 429 | + """Tests for parse_cross_review_response.""" |
| 430 | + |
| 431 | + def test_valid_json(self): |
| 432 | + content = '{"assessments": [{"id": "finding-1", "valid": true, "rank": 1}], "summary": "OK"}' |
| 433 | + assessments, summary = parse_cross_review_response(content) |
| 434 | + assert len(assessments) == 1 |
| 435 | + assert assessments[0]["id"] == "finding-1" |
| 436 | + assert assessments[0]["valid"] is True |
| 437 | + assert assessments[0]["rank"] == 1 |
| 438 | + assert summary == "OK" |
| 439 | + |
| 440 | + def test_markdown_fenced_json(self): |
| 441 | + content = """Some text |
| 442 | +```json |
| 443 | +{"assessments": [{"id": "f1", "valid": false, "rank": 2}], "summary": "Done"} |
| 444 | +``` |
| 445 | +""" |
| 446 | + assessments, summary = parse_cross_review_response(content) |
| 447 | + assert len(assessments) == 1 |
| 448 | + assert assessments[0]["id"] == "f1" |
| 449 | + assert assessments[0]["valid"] is False |
| 450 | + assert summary == "Done" |
| 451 | + |
| 452 | + def test_malformed_input_returns_empty(self): |
| 453 | + assessments, summary = parse_cross_review_response("not json at all") |
| 454 | + assert assessments == [] |
| 455 | + assert summary == "" |
| 456 | + |
| 457 | + def test_invalid_json_returns_empty(self): |
| 458 | + content = '{"assessments": [invalid]}' |
| 459 | + assessments, summary = parse_cross_review_response(content) |
| 460 | + assert assessments == [] |
| 461 | + assert summary == "" |
| 462 | + |
| 463 | + |
| 464 | +class TestApplyCrossReview: |
| 465 | + """Tests for apply_cross_review.""" |
| 466 | + |
| 467 | + def test_no_assessments_returns_unchanged(self): |
| 468 | + review = _make_review([_make_finding("f1")]) |
| 469 | + result = apply_cross_review(review, []) |
| 470 | + assert result.findings == review.findings |
| 471 | + assert result.summary == review.summary |
| 472 | + |
| 473 | + def test_no_votes_for_finding_kept(self): |
| 474 | + """Findings with zero votes are kept (not counted as rejected).""" |
| 475 | + review = _make_review([_make_finding("f1"), _make_finding("f2")]) |
| 476 | + # Only agent assesses f1; f2 gets no votes |
| 477 | + all_assessments = [ |
| 478 | + ("agent-1", [{"id": "f1", "valid": True, "rank": 1}]), |
| 479 | + ] |
| 480 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5) |
| 481 | + assert len(result.findings) == 2 |
| 482 | + ids = [f.id for f in result.findings] |
| 483 | + assert "f1" in ids and "f2" in ids |
| 484 | + |
| 485 | + def test_partial_votes_uses_len_votes_not_n_agents(self): |
| 486 | + """Valid ratio is over assessing agents, not total agents.""" |
| 487 | + review = _make_review([_make_finding("f1")]) |
| 488 | + # 1 valid out of 1 assessing agent -> ratio 1.0, kept |
| 489 | + all_assessments = [("agent-1", [{"id": "f1", "valid": True, "rank": 1}])] |
| 490 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5) |
| 491 | + assert len(result.findings) == 1 |
| 492 | + # 1 valid, 1 invalid from 2 agents that assessed it -> 0.5 |
| 493 | + all_assessments = [ |
| 494 | + ("agent-1", [{"id": "f1", "valid": True, "rank": 1}]), |
| 495 | + ("agent-2", [{"id": "f1", "valid": False, "rank": 5}]), |
| 496 | + ] |
| 497 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=0.5) |
| 498 | + assert len(result.findings) == 1 |
| 499 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=0.67) |
| 500 | + assert len(result.findings) == 0 |
| 501 | + |
| 502 | + def test_threshold_drops_below_keeps_at_or_above(self): |
| 503 | + review = _make_review([_make_finding("f1")]) |
| 504 | + # 2/3 valid |
| 505 | + all_assessments = [ |
| 506 | + ("a1", [{"id": "f1", "valid": True, "rank": 1}]), |
| 507 | + ("a2", [{"id": "f1", "valid": True, "rank": 2}]), |
| 508 | + ("a3", [{"id": "f1", "valid": False, "rank": 3}]), |
| 509 | + ] |
| 510 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=2 / 3) |
| 511 | + assert len(result.findings) == 1 |
| 512 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=0.9) |
| 513 | + assert len(result.findings) == 0 |
| 514 | + |
| 515 | + def test_reordered_by_avg_rank_then_severity(self): |
| 516 | + f1 = _make_finding("f1", Severity.WARNING) |
| 517 | + f2 = _make_finding("f2", Severity.CRITICAL) |
| 518 | + f3 = _make_finding("f3", Severity.SUGGESTION) |
| 519 | + review = _make_review([f1, f2, f3]) |
| 520 | + # f1 rank 3, f2 rank 1, f3 rank 2 -> order f2, f3, f1 |
| 521 | + all_assessments = [ |
| 522 | + ("a1", [{"id": "f1", "valid": True, "rank": 3}, {"id": "f2", "valid": True, "rank": 1}, {"id": "f3", "valid": True, "rank": 2}]), |
| 523 | + ] |
| 524 | + result = apply_cross_review(review, all_assessments) |
| 525 | + assert [x.id for x in result.findings] == ["f2", "f3", "f1"] |
| 526 | + |
| 527 | + def test_summary_unchanged_when_nothing_dropped_or_reordered(self): |
| 528 | + review = _make_review([_make_finding("f1")]) |
| 529 | + all_assessments = [("a1", [{"id": "f1", "valid": True, "rank": 1}])] |
| 530 | + result = apply_cross_review(review, all_assessments) |
| 531 | + assert result.summary == review.summary |
| 532 | + |
| 533 | + def test_summary_appends_when_dropped(self): |
| 534 | + review = _make_review([_make_finding("f1"), _make_finding("f2")]) |
| 535 | + all_assessments = [ |
| 536 | + ("a1", [{"id": "f1", "valid": True, "rank": 1}, {"id": "f2", "valid": False, "rank": 2}]), |
| 537 | + ("a2", [{"id": "f1", "valid": True, "rank": 1}, {"id": "f2", "valid": False, "rank": 2}]), |
| 538 | + ] |
| 539 | + result = apply_cross_review(review, all_assessments, min_validation_agreement=1.0) |
| 540 | + assert len(result.findings) == 1 |
| 541 | + assert "1 finding(s) dropped" in result.summary |
| 542 | + |
| 543 | + |
| 544 | +class TestGetCrossReviewPrompt: |
| 545 | + """Tests for get_cross_review_prompt.""" |
| 546 | + |
| 547 | + def test_diff_truncated_at_newline(self): |
| 548 | + """Diff excerpt does not cut mid-line.""" |
| 549 | + context = ReviewContext( |
| 550 | + repo_name="test/repo", |
| 551 | + pr_number=1, |
| 552 | + pr_title="Title", |
| 553 | + pr_description="", |
| 554 | + base_branch="main", |
| 555 | + head_branch="feature", |
| 556 | + author="dev", |
| 557 | + changed_files_count=1, |
| 558 | + additions=10, |
| 559 | + deletions=2, |
| 560 | + ) |
| 561 | + review = _make_review([_make_finding("finding-1")]) |
| 562 | + # Create diff that would be cut at 50 chars (mid-line) |
| 563 | + line = "a" * 30 + "\n" + "b" * 30 |
| 564 | + diff = line + "\nlast" |
| 565 | + import ai_reviewer.review as review_mod |
| 566 | + |
| 567 | + old_max = review_mod._CROSS_REVIEW_DIFF_MAX_CHARS |
| 568 | + try: |
| 569 | + review_mod._CROSS_REVIEW_DIFF_MAX_CHARS = 50 |
| 570 | + prompt = get_cross_review_prompt(context, review, diff) |
| 571 | + # Excerpt should end at newline, not mid "b" |
| 572 | + assert "```diff" in prompt |
| 573 | + excerpt = prompt.split("```diff")[1].split("```")[0].strip() |
| 574 | + assert excerpt.endswith("a" * 30) |
| 575 | + assert not excerpt.endswith("b") |
| 576 | + finally: |
| 577 | + review_mod._CROSS_REVIEW_DIFF_MAX_CHARS = old_max |
0 commit comments