Altinity · MyroTk · Jun 5, 2025 · May 22, 2025 · May 22, 2025 · May 29, 2025
diff --git a/.github/create_workflow_report.py b/.github/create_workflow_report.py
@@ -253,11 +253,17 @@ def get_commit_statuses(sha: str) -> pd.DataFrame:
         for item in all_data
     ]
 
-    return (
-        pd.DataFrame(parsed)
-        .sort_values(by=["job_status", "job_name"], ascending=[True, True])
-        .reset_index(drop=True)
-    )
+    # Create DataFrame
+    df = pd.DataFrame(parsed)
+
+    # Drop duplicates keeping the first occurrence (newest status for each context)
+    # GitHub returns statuses in reverse chronological order
+    df = df.drop_duplicates(subset=["job_name"], keep="first")
+
+    # Sort by status and job name
+    return df.sort_values(
+        by=["job_status", "job_name"], ascending=[True, True]
+    ).reset_index(drop=True)
 
 
 def get_pr_info_from_number(pr_number: str) -> dict:
@@ -291,28 +297,50 @@ def get_checks_fails(client: Client, job_url: str):
     Get tests that did not succeed for the given job URL.
     Exclude checks that have status 'error' as they are counted in get_checks_errors.
     """
-    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url LIKE '{job_url}%'
-                AND test_status IN ('FAIL', 'ERROR')
-                AND check_status!='error'
-                ORDER BY check_name, test_name
-                """
+    query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+            FROM (
+                SELECT
+                    argMax(check_status, check_start_time) as job_status,
+                    check_name as job_name,
+                    argMax(test_status, check_start_time) as status,
+                    test_name,
+                    report_url as results_link,
+                    task_url
+                FROM `gh-data`.checks
+                GROUP BY check_name, test_name, report_url, task_url
+            )
+            WHERE task_url LIKE '{job_url}%'
+            AND test_status IN ('FAIL', 'ERROR')
+            AND job_status!='error'
+            ORDER BY job_name, test_name
+            """
     return client.query_dataframe(query)
 
 
 def get_checks_known_fails(client: Client, job_url: str, known_fails: dict):
     """
     Get tests that are known to fail for the given job URL.
     """
-    assert len(known_fails) > 0, "cannot query the database with empty known fails"
-    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url LIKE '{job_url}%'
-                AND test_status='BROKEN'
-                AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
-                ORDER BY test_name, check_name
-                """
+    if len(known_fails) == 0:
+        return pd.DataFrame()
+
+    query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+        FROM (
+            SELECT
+                argMax(check_status, check_start_time) as job_status,
+                check_name as job_name,
+                argMax(test_status, check_start_time) as status,
+                test_name,
+                report_url as results_link,
+                task_url
+            FROM `gh-data`.checks
+            GROUP BY check_name, test_name, report_url, task_url
+        )
+        WHERE task_url LIKE '{job_url}%'
+        AND test_status='BROKEN'
+        AND test_name IN ({','.join(f"'{test}'" for test in known_fails.keys())})
+        ORDER BY job_name, test_name
+        """
 
     df = client.query_dataframe(query)
 
@@ -333,12 +361,22 @@ def get_checks_errors(client: Client, job_url: str):
     """
     Get checks that have status 'error' for the given job URL.
     """
-    columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
-    query = f"""SELECT {columns} FROM `gh-data`.checks
-                WHERE task_url LIKE '{job_url}%'
-                AND check_status=='error'
-                ORDER BY check_name, test_name
-                """
+    query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
+            FROM (
+                SELECT
+                    argMax(check_status, check_start_time) as job_status,
+                    check_name as job_name,
+                    argMax(test_status, check_start_time) as status,
+                    test_name,
+                    report_url as results_link,
+                    task_url
+                FROM `gh-data`.checks
+                GROUP BY check_name, test_name, report_url, task_url
+            )
+            WHERE task_url LIKE '{job_url}%'
+            AND job_status=='error'
+            ORDER BY job_name, test_name
+            """
     return client.query_dataframe(query)
 
 

diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
@@ -649,12 +649,13 @@ jobs:
           ${{ toJson(needs) }}
           EOF
           python3 ./tests/ci/ci_buddy.py --check-wf-status
-      - name: Create and upload combined report
+      - name: Create and upload report
         if: ${{ !cancelled() }}
         env:
           CHECKS_DATABASE_HOST: ${{ secrets.CHECKS_DATABASE_HOST }}
           CHECKS_DATABASE_USER: ${{ secrets.CLICKHOUSE_TEST_STAT_LOGIN }}
           CHECKS_DATABASE_PASSWORD: ${{ secrets.CLICKHOUSE_TEST_STAT_PASSWORD }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMMIT_SHA: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number || 0 }}
           ACTIONS_RUN_URL: ${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}
@@ -664,6 +665,8 @@ jobs:
 
           REPORT_LINK=$(python3 .github/create_workflow_report.py --pr-number $PR_NUMBER --commit-sha $COMMIT_SHA --actions-run-url $ACTIONS_RUN_URL --known-fails tests/broken_tests.json --cves)
 
+          echo $REPORT_LINK
+
           IS_VALID_URL=$(echo $REPORT_LINK | grep -E '^https?://')
           if [[ -n $IS_VALID_URL ]]; then
             echo "Combined CI Report: [View Report]($REPORT_LINK)" >> $GITHUB_STEP_SUMMARY