From 5e8f6c6a1f7fa51c263e9e3883847df0088261cc Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Thu, 11 Dec 2025 10:06:36 -0600
Subject: [PATCH 01/16] Revert "Pin scikit-learn<1.8 in test dependencies
 (#7588)"

This reverts commit 0b41aeb18390fb6e66e0e22acf6820a9c311b2fa.
---
 conda/environments/all_cuda-129_arch-aarch64.yaml | 1 -
 conda/environments/all_cuda-129_arch-x86_64.yaml  | 1 -
 conda/environments/all_cuda-130_arch-aarch64.yaml | 1 -
 conda/environments/all_cuda-130_arch-x86_64.yaml  | 1 -
 dependencies.yaml                                 | 2 +-
 python/cuml/pyproject.toml                        | 2 +-
 6 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 96adb871d2..140479f536 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -71,7 +71,6 @@ dependencies:
 - rmm==26.2.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
 - scikit-learn>=1.4
-- scikit-learn>=1.4,<1.8.0
 - scipy>=1.11.0
 - seaborn
 - sphinx
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 7839dc67b7..5f8e8903a6 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -71,7 +71,6 @@ dependencies:
 - rmm==26.2.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
 - scikit-learn>=1.4
-- scikit-learn>=1.4,<1.8.0
 - scipy>=1.11.0
 - seaborn
 - sphinx
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index 494da2e884..449b5ec714 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -71,7 +71,6 @@ dependencies:
 - rmm==26.2.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
 - scikit-learn>=1.4
-- scikit-learn>=1.4,<1.8.0
 - scipy>=1.11.0
 - seaborn
 - sphinx
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index 3e8908c335..6b2b6754d1 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -71,7 +71,6 @@ dependencies:
 - rmm==26.2.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
 - scikit-learn>=1.4
-- scikit-learn>=1.4,<1.8.0
 - scipy>=1.11.0
 - seaborn
 - sphinx
diff --git a/dependencies.yaml b/dependencies.yaml
index f77919dd8d..dc71360a08 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -486,7 +486,7 @@ dependencies:
           - pytest-cov
           - pytest-xdist
           - seaborn
-          - scikit-learn>=1.4,<1.8.0
+          - *scikit_learn
           - statsmodels
           - tenacity
           - umap-learn==0.5.7
diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml
index 69e930c553..77569d1b6a 100644
--- a/python/cuml/pyproject.toml
+++ b/python/cuml/pyproject.toml
@@ -120,7 +120,7 @@ test = [
     "pytest-xdist",
     "pytest<9.0",
     "pyyaml",
-    "scikit-learn>=1.4,<1.8.0",
+    "scikit-learn>=1.4",
     "seaborn",
     "statsmodels",
     "tenacity",

From 2a35495ed069779db7a118bab4b6bfb30764f6b7 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:13:05 -0600
Subject: [PATCH 02/16] Skip HDBSCAN tests with scikit-learn 1.8.

---
 python/cuml/cuml_accel_tests/integration/test_hdbscan.py | 3 +++
 python/cuml/tests/test_hdbscan.py                        | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py
index 68b80205a3..c7c01efd81 100644
--- a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py
+++ b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py
@@ -13,6 +13,9 @@
 from sklearn.preprocessing import StandardScaler
 
 if Version(sklearn.__version__) >= Version("1.8.0.dev0"):
+    # NOTE: Remove this skip when issue
+    # https://github.com/scikit-learn-contrib/hdbscan/issues/689 is resolved,
+    # as it blocks compatibility with scikit-learn >= 1.8.0.dev0.
     pytest.skip(
         "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True
     )
diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py
index c6d876c8ac..ac3bfb8821 100644
--- a/python/cuml/tests/test_hdbscan.py
+++ b/python/cuml/tests/test_hdbscan.py
@@ -6,6 +6,8 @@
 import numpy as np
 import pandas as pd
 import pytest
+import sklearn
+from packaging.version import Version
 from pylibraft.common import DeviceResourcesSNMG
 from sklearn import datasets
 from sklearn.datasets import make_blobs
@@ -23,6 +25,11 @@
 from cuml.testing.datasets import make_pattern
 from cuml.testing.utils import array_equal
 
+if Version(sklearn.__version__) >= Version("1.8.0.dev0"):
+    pytest.skip(
+        "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True
+    )
+
 dataset_names = ["noisy_circles", "noisy_moons", "varied"]
 
 

From e61e4a58f40086798fd5735f9f0be6729ff92e9c Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:18:56 -0600
Subject: [PATCH 03/16] Make test_logistic_regression_model_default test
 compatible with scikit-learn 1.8.

---
 python/cuml/tests/test_linear_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py
index e4e243dedf..34a15d47d4 100644
--- a/python/cuml/tests/test_linear_model.py
+++ b/python/cuml/tests/test_linear_model.py
@@ -417,7 +417,7 @@ def test_logistic_regression_model_default(dtype):
     y_test = y_test.astype(dtype)
     culog = cuLog()
     culog.fit(X_train, y_train)
-    sklog = skLog(multi_class="auto")
+    sklog = skLog()
 
     sklog.fit(X_train, y_train)
 

From 477c2e9e07ad69c79ae962fdd0a448876afaf34d Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:31:59 -0600
Subject: [PATCH 04/16] Make test_device_selection test compatible with sklearn
 1.8.

---
 python/cuml/tests/test_fil.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/cuml/tests/test_fil.py b/python/cuml/tests/test_fil.py
index 31997b52df..63c991702b 100644
--- a/python/cuml/tests/test_fil.py
+++ b/python/cuml/tests/test_fil.py
@@ -10,7 +10,9 @@
 import numpy as np
 import pandas as pd
 import pytest
+import sklearn
 import treelite
+from packaging.version import Version
 
 # Import XGBoost before scikit-learn to work around a libgomp bug
 # See https://github.com/dmlc/xgboost/issues/7110
@@ -910,6 +912,9 @@ def test_device_selection(device_id, model_kind, tmp_path):
         )
         xgb_model.fit(X, y)
         model_path = os.path.join(tmp_path, "xgb_class.ubj")
+        # skip with sklearn version 1.8.0.dev0
+        if Version(sklearn.__version__) >= Version("1.8.0.dev0"):
+            pytest.skip("xgboost is incompatible with sklearn >= 1.8.0.dev0")
         xgb_model.save_model(model_path)
         fm = ForestInference.load(
             model_path,

From 9986457955b18694e389e64ff18c5ba07f99e358 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:37:15 -0600
Subject: [PATCH 05/16] Skip UMAP-learn tests with scikit-learn 1.8.

The package umap-learn package is incompatible.
---
 python/cuml/tests/test_umap.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/cuml/tests/test_umap.py b/python/cuml/tests/test_umap.py
index a66c48dc34..4e80387de6 100644
--- a/python/cuml/tests/test_umap.py
+++ b/python/cuml/tests/test_umap.py
@@ -9,7 +9,9 @@
 import numpy as np
 import pytest
 import scipy.sparse as scipy_sparse
+import sklearn
 import umap
+from packaging.version import Version
 from pylibraft.common import DeviceResourcesSNMG
 from sklearn import datasets
 from sklearn.cluster import KMeans
@@ -29,6 +31,9 @@
     unit_param,
 )
 
+if Version(sklearn.__version__) >= Version("1.8.0.dev0"):
+    pytest.skip("umap requires sklearn < 1.8.0.dev0", allow_module_level=True)
+
 dataset_names = ["iris", "digits", "wine", "blobs"]
 
 

From a4c26fb0c87b5da7275ba6d1ac397373313bcd3e Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:41:39 -0600
Subject: [PATCH 06/16] Conditionally skip test_trustworthiness due to umap
 incompatibility.

---
 python/cuml/tests/test_trustworthiness.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cuml/tests/test_trustworthiness.py b/python/cuml/tests/test_trustworthiness.py
index e684f26442..668a9375df 100644
--- a/python/cuml/tests/test_trustworthiness.py
+++ b/python/cuml/tests/test_trustworthiness.py
@@ -4,6 +4,8 @@
 import cudf
 import numpy as np
 import pytest
+import sklearn
+from packaging.version import Version
 from sklearn.datasets import make_blobs
 from sklearn.manifold import trustworthiness as sklearn_trustworthiness
 from umap import UMAP
@@ -19,6 +21,10 @@
 @pytest.mark.filterwarnings(
     "ignore:n_jobs value.*overridden.*by setting random_state.*:UserWarning"
 )
+@pytest.mark.xfail(
+    condition=Version(sklearn.__version__) >= Version("1.8.0.dev0"),
+    reason="umap-learn is incompatible with sklearn >= 1.8.0",
+)
 def test_trustworthiness(
     input_type, n_samples, n_features, n_components, batch_size
 ):

From 64bdb197cda3bf283c538b8788f375cf7d2ad514 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 09:59:08 -0600
Subject: [PATCH 07/16] Make test_logistic_regression compatible with
 scikit-learn 1.8.

---
 python/cuml/tests/test_linear_model.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py
index 34a15d47d4..062d546675 100644
--- a/python/cuml/tests/test_linear_model.py
+++ b/python/cuml/tests/test_linear_model.py
@@ -329,7 +329,6 @@ def test_logistic_regression(
                 solver="saga",
                 C=C,
                 fit_intercept=fit_intercept,
-                multi_class="auto",
             )
         else:
             sklog = skLog(
@@ -337,7 +336,6 @@ def test_logistic_regression(
                 solver="saga",
                 C=C,
                 fit_intercept=fit_intercept,
-                multi_class="auto",
             )
     else:
         sklog = skLog(
@@ -345,7 +343,6 @@ def test_logistic_regression(
             solver="lbfgs",
             C=C,
             fit_intercept=fit_intercept,
-            multi_class="auto",
         )
 
     sklog.fit(X_train, y_train)
@@ -586,11 +583,7 @@ def test_logistic_regression_predict_proba(
 
     sklog = skLog(
         fit_intercept=fit_intercept,
-        **(
-            {"solver": "lbfgs", "multi_class": "multinomial"}
-            if num_classes > 2
-            else {}
-        ),
+        **({"solver": "lbfgs"} if num_classes > 2 else {}),
     )
     sklog.coef_ = culog.coef_
     sklog.intercept_ = culog.intercept_ if fit_intercept else 0

From 3b954a1f142ec057c10e925911ff609917e95196 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 10:46:42 -0600
Subject: [PATCH 08/16] Run the cuml-accel upstream tests on oldest and
 intermediate deps.

UMAP-learn is not yet compatible with scikit-learn 1.8.
---
 .github/workflows/test.yaml | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index f84f0fa456..83177fb695 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -95,8 +95,19 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: "ci/test_python_cuml_accel_upstream.sh"
-      # Select amd64 and one job per major CUDA version with the latest CUDA and Python versions
-      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([.CUDA_VER,.PY_VER]|map(split(".")|map(tonumber))))
+      # Select amd64 with oldest deps, plus an "intermediate" entry based on oldest
+      matrix_filter: '(
+        map(select(.ARCH == "amd64"))
+        | map(select(.DEPENDENCIES == "oldest"))
+        | sort_by(.PY_VER)
+        )
+      +
+      (
+        map(select(.ARCH == "amd64"))
+        | map(select(.DEPENDENCIES == "oldest"))
+        | sort_by(.PY_VER)
+        | map(.DEPENDENCIES = "intermediate")
+      )'
       sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN
   wheel-tests-cuml:
     secrets: inherit

From 83e42877ef00195ac962bc1fdbdc09b1332b1a86 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 12:44:43 -0600
Subject: [PATCH 09/16] Revert later: Expand xfail-manager to support "set"
 function.

---
 .../upstream/xfail_manager.py                 | 252 ++++++++++++++++++
 1 file changed, 252 insertions(+)

diff --git a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
index 15e0bb000b..d98d85e9de 100755
--- a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
+++ b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
@@ -14,9 +14,11 @@
 - Deterministic formatting and sorting of xfail lists
 - Validation of group conditions
 - Cleanup of empty groups
+- Batch modification of test metadata
 
 CLI Commands:
 - format: Apply consistent formatting and sorting
+- set: Modify metadata (reason, condition, marker, strict) for specified tests
 
 The tool ensures xfail lists remain maintainable and produce clean diffs
 in version control systems.
@@ -171,6 +173,135 @@ def __init__(self, xfail_list_path: Optional[Union[str, Path]] = None):
         if xfail_list_path:
             self.load(xfail_list_path)
 
+    def find_test(self, test_id: str) -> Optional[XfailGroup]:
+        """Find the group containing a specific test.
+
+        Args:
+            test_id: The test ID to search for
+
+        Returns:
+            The XfailGroup containing the test, or None if not found
+        """
+        for group in self.groups:
+            if test_id in group.tests:
+                return group
+        return None
+
+    def remove_test(self, test_id: str) -> bool:
+        """Remove a test from its current group.
+
+        Args:
+            test_id: The test ID to remove
+
+        Returns:
+            True if the test was found and removed, False otherwise
+        """
+        for group in self.groups:
+            if test_id in group.tests:
+                group.tests.remove(test_id)
+                return True
+        return False
+
+    def set_test_metadata(
+        self,
+        test_ids: List[str],
+        reason: Optional[str] = None,
+        condition: Optional[str] = None,
+        marker: Optional[str] = None,
+        strict: Optional[bool] = None,
+        run: Optional[bool] = None,
+    ) -> Dict[str, Any]:
+        """Set metadata for specified tests, moving them to appropriate groups.
+
+        For each test, this method:
+        1. Finds the test's current group (if any) to get default metadata
+        2. Overrides only the metadata options that were explicitly provided
+        3. Removes the test from its original group
+        4. Adds the test to a group with the new metadata
+
+        Args:
+            test_ids: List of test IDs to modify
+            reason: New reason (if provided)
+            condition: New condition (if provided)
+            marker: New marker (if provided)
+            strict: New strict value (if provided)
+            run: New run value (if provided)
+
+        Returns:
+            Dictionary with 'moved', 'added', and 'not_found' lists
+        """
+        results = {"moved": [], "added": [], "not_found": []}
+
+        for test_id in test_ids:
+            test_id = QuoteTestID(test_id)
+
+            # Find current group for this test
+            current_group = self.find_test(test_id)
+
+            if current_group:
+                # Get defaults from current group
+                new_reason = (
+                    reason if reason is not None else current_group.reason
+                )
+                new_condition = (
+                    condition
+                    if condition is not None
+                    else current_group.condition
+                )
+                new_marker = (
+                    marker if marker is not None else current_group.marker
+                )
+                new_strict = (
+                    strict if strict is not None else current_group.strict
+                )
+                new_run = run if run is not None else current_group.run
+
+                # Remove from current group
+                self.remove_test(test_id)
+                results["moved"].append(test_id)
+            else:
+                # Test not found - use provided values or defaults
+                if reason is None:
+                    results["not_found"].append(test_id)
+                    continue
+
+                new_reason = reason
+                new_condition = condition
+                new_marker = marker
+                new_strict = strict if strict is not None else True
+                new_run = run if run is not None else True
+                results["added"].append(test_id)
+
+            # Find or create a group with matching metadata
+            target_group = None
+            for group in self.groups:
+                if (
+                    group.reason == new_reason
+                    and group.condition == new_condition
+                    and group.marker == new_marker
+                    and group.strict == new_strict
+                    and group.run == new_run
+                ):
+                    target_group = group
+                    break
+
+            if target_group is None:
+                # Create new group
+                target_group = XfailGroup(
+                    reason=new_reason,
+                    tests=[],
+                    strict=new_strict,
+                    condition=new_condition,
+                    run=new_run,
+                    marker=new_marker,
+                )
+                self.groups.append(target_group)
+
+            # Add test to target group
+            target_group.tests.append(test_id)
+
+        return results
+
     def load(self, xfail_list_path: Union[str, Path]) -> None:
         """Load xfail list from YAML file."""
         path = Path(xfail_list_path)
@@ -358,6 +489,86 @@ def _format_single_file(xfail_path, args):
         return 1
 
 
+def cmd_set(args):
+    """Set metadata for specified tests in the xfail list."""
+    xfail_path = Path(args.xfail_list)
+
+    if not xfail_path.exists():
+        print(
+            f"Error: Xfail list file not found: {xfail_path}", file=sys.stderr
+        )
+        return 1
+
+    # Validate that at least one metadata option is provided
+    has_metadata = any(
+        [
+            args.reason is not None,
+            args.condition is not None,
+            args.marker is not None,
+            args.strict is not None,
+        ]
+    )
+
+    if not has_metadata:
+        print(
+            "Error: At least one of --reason, --condition, --marker, "
+            "or --strict must be provided",
+            file=sys.stderr,
+        )
+        return 1
+
+    try:
+        manager = XfailManager(xfail_path)
+
+        results = manager.set_test_metadata(
+            test_ids=args.test_ids,
+            reason=args.reason,
+            condition=args.condition,
+            marker=args.marker,
+            strict=args.strict,
+        )
+
+        # Report results
+        if results["moved"]:
+            print(f"Moved {len(results['moved'])} test(s) to new group:")
+            for test_id in results["moved"]:
+                print(f"  {test_id}")
+
+        if results["added"]:
+            print(f"Added {len(results['added'])} new test(s):")
+            for test_id in results["added"]:
+                print(f"  {test_id}")
+
+        if results["not_found"]:
+            print(
+                f"Warning: {len(results['not_found'])} test(s) not found "
+                "(--reason required to add new tests):",
+                file=sys.stderr,
+            )
+            for test_id in results["not_found"]:
+                print(f"  {test_id}", file=sys.stderr)
+
+        # Clean up empty groups
+        manager.cleanup_empty_groups()
+
+        # Validate and save
+        validation_errors = manager.validate_conditions()
+        if validation_errors:
+            print("Validation errors:", file=sys.stderr)
+            for error in validation_errors:
+                print(f"  {error}", file=sys.stderr)
+            return 1
+
+        manager.save(xfail_path)
+        print(f"Updated {xfail_path}")
+
+        return 0 if not results["not_found"] else 1
+
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        return 1
+
+
 def main():
     """Main CLI entry point."""
     parser = argparse.ArgumentParser(
@@ -383,6 +594,47 @@ def main():
     )
     format_parser.set_defaults(func=cmd_format)
 
+    # Set command
+    set_parser = subparsers.add_parser(
+        "set",
+        help="Set metadata for specified tests in the xfail list",
+    )
+    set_parser.add_argument(
+        "xfail_list",
+        help="Xfail list file to modify",
+    )
+    set_parser.add_argument(
+        "test_ids",
+        nargs="+",
+        metavar="TEST_ID",
+        help="Test IDs to modify",
+    )
+    set_parser.add_argument(
+        "--reason",
+        help="Set the reason for the xfail group",
+    )
+    set_parser.add_argument(
+        "--condition",
+        help="Set the condition for the xfail group (e.g., 'scikit-learn<1.8')",
+    )
+    set_parser.add_argument(
+        "--marker",
+        help="Set the pytest marker for the xfail group",
+    )
+    set_parser.add_argument(
+        "--strict",
+        action="store_true",
+        default=None,
+        help="Set strict=true for the xfail group",
+    )
+    set_parser.add_argument(
+        "--no-strict",
+        action="store_false",
+        dest="strict",
+        help="Set strict=false for the xfail group",
+    )
+    set_parser.set_defaults(func=cmd_set)
+
     args = parser.parse_args()
 
     if not args.command:

From e407f7ec2609aae7baa92aaf80cc38614ffb256d Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 12:51:50 -0600
Subject: [PATCH 10/16] Update scikit-learn xfail list for scikit-learn 1.8.

---
 .../upstream/scikit-learn/xfail-list.yaml     | 344 ++++++++++++++----
 1 file changed, 283 insertions(+), 61 deletions(-)

diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
index 421305135c..1629121a09 100644
--- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
+++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
@@ -1,3 +1,44 @@
+- reason: AUC standard deviation differs slightly with cuml.accel in sklearn 1.8
+  marker: cuml_accel_bugs
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]"
+  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]"
+- reason: Search CV sample weight equivalence differs with cuml.accel in sklearn 1.8
+  marker: cuml_accel_bugs
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]"
+- reason: Test should fail with cuml.accel
+  marker: cuml_accel_bugs
+  condition: scikit-learn<1.8
+  tests:
+  - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]"
+  - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]"
+  - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]"
+  - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]"
 - reason: Test should fail with cuml.accel
   marker: cuml_accel_bugs
   tests:
@@ -200,7 +241,6 @@
   - "sklearn.linear_model.tests.test_common::test_balance_property[42-True-LogisticRegression]"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_check_input_false"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_elasticnet_precompute_gram_weighted_samples"
-  - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_copy_X_False_check_input_False"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_float_precision"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_multitarget"
@@ -215,7 +255,6 @@
   - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_readonly_data"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_toy"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_zero"
-  - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence"
   - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence_with_regularizer_decrement"
   - "sklearn.linear_model.tests.test_ransac::test_perfect_horizontal_line"
   - "sklearn.linear_model.tests.test_ransac::test_ransac_exceed_max_skips"
@@ -240,17 +279,13 @@
   - "sklearn.linear_model.tests.test_sag::test_step_size_alpha_error"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_array]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_matrix]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-Lasso]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-Lasso]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-Lasso]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-Lasso]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-Lasso]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-Lasso]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-ElasticNet]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-Lasso]"
@@ -262,8 +297,6 @@
   - "sklearn.manifold.tests.test_t_sne::test_bh_match_exact"
   - "sklearn.manifold.tests.test_t_sne::test_binary_perplexity_stability"
   - "sklearn.manifold.tests.test_t_sne::test_n_iter_without_progress"
-  - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]"
-  - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]"
   - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-barnes_hut]"
   - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-exact]"
   - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[random-barnes_hut]"
@@ -451,11 +484,6 @@
   - "sklearn.neighbors.tests.test_neighbors::test_neighbors_metrics[float64-minkowski]"
   - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csc_array]"
   - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csr_array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]"
@@ -781,11 +809,6 @@
   - "sklearn.model_selection.tests.test_classification_threshold::test_fit_and_score_over_thresholds_sample_weight"
   - "sklearn.model_selection.tests.test_classification_threshold::test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence"
   - "sklearn.model_selection.tests.test_validation::test_cross_val_predict_class_subset"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]"
   - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]"
@@ -834,31 +857,21 @@
   - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weight_equivalence_on_sparse_data]"
   - "sklearn.tests.test_common::test_estimators[Ridge()-check_non_transformer_estimators_n_iter]"
   - "sklearn.utils.tests.test_estimator_checks::test_check_estimator_pairwise"
-- reason: Test should fail with cuml.accel (scikit-learn 1.7)
+- reason: Test should fail with cuml.accel (scikit-learn <1.8)
   marker: cuml_accel_bugs
-  condition: scikit-learn == 1.7.*
+  condition: scikit-learn<1.8
   tests:
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]"
-  - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]"
-  - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]"
+- reason: Test should fail with cuml.accel (scikit-learn<1.6)
+  marker: cuml_accel_bugs
+  condition: scikit-learn<1.6
+  tests:
+  - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]"
+  - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]"
 - reason: Test should fail with cuml.accel (scikit-learn<1.7)
   marker: cuml_accel_bugs
   condition: scikit-learn<1.5
@@ -869,12 +882,6 @@
   - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_array-auto]"
   - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_matrix-auto]"
   - "sklearn.manifold.tests.test_t_sne::test_n_iter_used"
-- reason: Test should fail with cuml.accel (scikit-learn<1.7)
-  marker: cuml_accel_bugs
-  condition: scikit-learn<1.7
-  tests:
-  - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]"
-  - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]"
 - reason: Test should fail with cuml.accel (scikit-learn<1.7)
   marker: cuml_accel_bugs
   condition: scikit-learn<1.7,>=1.5.0
@@ -906,6 +913,11 @@
   strict: false
   tests:
   - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[SpectralEmbedding()]"
+- reason: cuml raises a different error if X doesn't have expected n features
+  marker: cuml_accel_check_n_features_in
+  condition: scikit-learn==1.7.*
+  tests:
+  - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]"
 - reason: cuml raises a different error if X doesn't have expected n features
   marker: cuml_accel_check_n_features_in
   condition: scikit-learn>=1.6
@@ -923,7 +935,6 @@
   - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_n_features_in_after_fitting]"
   - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_n_features_in_after_fitting]"
   - "sklearn.tests.test_common::test_estimators[Ridge()-check_n_features_in_after_fitting]"
-  - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]"
   - "sklearn.tests.test_common::test_estimators[SVR()-check_n_features_in_after_fitting]"
   - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_n_features_in_after_fitting]"
 - reason: cuml doesn't set `feature_names_in_` properly
@@ -955,6 +966,15 @@
   tests:
   - "sklearn.mixture.tests.test_gaussian_mixture::test_gaussian_mixture_precisions_init_diag"
   - "sklearn.utils.tests.test_estimator_html_repr::test_show_arrow_pipeline"
+- reason: Test is flaky with cuml.accel
+  marker: cuml_accel_flaky
+  condition: scikit-learn<1.8
+  strict: false
+  tests:
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]"
 - reason: Test is flaky with cuml.accel
   marker: cuml_accel_flaky
   condition: scikit-learn>=1.6
@@ -983,10 +1003,6 @@
   - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[2]"
   - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[3]"
   - "sklearn.manifold.tests.test_spectral_embedding::test_pipeline_spectral_clustering"
-  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]"
-  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]"
-  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]"
-  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]"
   - "sklearn.manifold.tests.test_t_sne::test_optimization_minimizes_kl_divergence"
   - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[barnes_hut]"
   - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[exact]"
@@ -1159,6 +1175,11 @@
   - "sklearn.linear_model._glm.tests.test_glm::test_linalg_warning_with_newton_solver[42]"
   - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_path_convergence_fail"
   - "sklearn.linear_model.tests.test_logistic::test_newton_cholesky_fallback_to_lbfgs[42]"
+- reason: SVC input handling and validation
+  marker: cuml_accel_svc_estimator_checks
+  condition: scikit-learn<1.8
+  tests:
+  - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params"
 - reason: SVC input handling and validation
   marker: cuml_accel_svc_estimator_checks
   condition: scikit-learn>=1.6
@@ -1168,7 +1189,6 @@
 - reason: SVC input handling and validation
   marker: cuml_accel_svc_estimator_checks
   tests:
-  - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params"
   - "sklearn.tests.test_common::test_estimators[SVC()-check_classifier_data_not_an_array]"
   - "sklearn.tests.test_common::test_estimators[SVC()-check_complex_data]"
   - "sklearn.tests.test_common::test_estimators[SVC()-check_estimators_nan_inf]"
@@ -1349,10 +1369,47 @@
   - "sklearn.tests.test_common::test_estimators[TSNE()-check_fit2d_predict1d]"
   - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_sample_order_invariance]"
   - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_subset_invariance]"
+- reason: Calibration temperature scaling differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-temperature]"
+  - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-temperature]"
+- reason: Elasticnet scores attribute layout differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_scores_attribute_layout_elasticnet"
 - reason: Flaky deviations in n_iter_ values in cuml.accel
   strict: false
   tests:
   - "sklearn.cluster.tests.test_k_means::test_kmeans_elkan_results[42-1e-100-dense-blobs]"
+- reason: Linear SVM sample weight handling differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params0]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params1]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params2]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params3]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params4]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params5]"
+  - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params6]"
+- reason: LinearSVC parameter validation differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l1-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l2-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-squared_hinge]"
+- reason: LinearSVM
+  condition: scikit-learn<1.8
+  tests:
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]"
+  - "sklearn.svm.tests.test_svm::test_liblinear_set_coef"
+  - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]"
+  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]"
 - reason: LinearSVM
   condition: scikit-learn<=1.6
   tests:
@@ -1377,20 +1434,12 @@
   - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-importance_getter0]"
   - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-regressor_.coef_]"
   - "sklearn.model_selection.tests.test_search::test_grid_search_no_score"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]"
-  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]"
   - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_array-dok_array]"
   - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_matrix-dok_matrix]"
   - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_array]"
   - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_matrix]"
   - "sklearn.svm.tests.test_sparse::test_sparse_liblinear_intercept_handling"
   - "sklearn.svm.tests.test_svm::test_dense_liblinear_intercept_handling"
-  - "sklearn.svm.tests.test_svm::test_liblinear_set_coef"
-  - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings"
-  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]"
-  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]"
-  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]"
-  - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]"
   - "sklearn.tests.test_calibration::test_calibration_default_estimator"
   - "sklearn.tests.test_calibration::test_calibration_inconsistent_prefit_n_features_in"
   - "sklearn.tests.test_calibration::test_calibration_multiclass[1-False-isotonic]"
@@ -1420,10 +1469,12 @@
   - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVC()]"
   - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVR()]"
 - reason: LinearSVM test expects exact results on small data, which isn't guaranteed
+  condition: scikit-learn<1.8
   tests:
   - "sklearn.svm.tests.test_svm::test_bad_input[lil_array]"
   - "sklearn.svm.tests.test_svm::test_bad_input[lil_matrix]"
 - reason: LinearSVM tests too strict of tolerances due to differences in solvers
+  condition: scikit-learn<1.8
   tests:
   - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params0]"
   - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params1]"
@@ -1432,6 +1483,58 @@
   - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params4]"
   - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params5]"
   - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params6]"
+- reason: LogisticRegression liblinear sample weight handling differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params0]"
+  - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params1]"
+  - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params2]"
+- reason: LogisticRegression missing _predict_proba_lr attribute with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_logreg_predict_proba_multinomial[42]"
+- reason: LogisticRegression multiclass solvers differ with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_solvers_multiclass[True]"
+- reason: LogisticRegression sample weights handling differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-lbfgs-single]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-liblinear-single]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cg-single]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cholesky-single]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-sag-single]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-saga-single]"
+- reason: LogisticRegressionCV fold coefficients differ with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_LogisticRegressionCV_on_folds"
+- reason: Multinomial logistic regression class weight handling differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_multinomial_logistic_regression_string_inputs"
+- reason: 'Numerical precision difference: cuML uses float32, test expects float64 precision'
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-False]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-True]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_cv_multinomial_score[42-neg_log_loss-multiclass_agg_list3]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[lbfgs]"
+  - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[newton-cholesky]"
+- reason: Numerical tolerance issue with Lasso sparse/dense equality in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-False-Lasso]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-True-Lasso]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-False-Lasso]"
+  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-True-Lasso]"
+- reason: Numerical tolerance issue with t-SNE sparse input in sklearn 1.8 (flaky)
+  condition: scikit-learn>=1.8
+  strict: false
+  tests:
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-exact]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-exact]"
 - reason: Ridge doesn't implement n_iter yet
   tests:
   - "sklearn.linear_model.tests.test_ridge::test_n_iter"
@@ -1473,3 +1576,122 @@
 - reason: The sklearn test has the error message accidentally flipped, our message is correct
   tests:
   - "sklearn.linear_model.tests.test_ridge::test_ridge_individual_penalties"
+- reason: cuML TSNE barnes_hut produces poor quality embeddings with sparse input
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-barnes_hut]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-barnes_hut]"
+- reason: cuML TSNE does not support sparse input with PCA initialization
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-barnes_hut]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-exact]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-barnes_hut]"
+  - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-exact]"
+- reason: cuML does not emit ConvergenceWarning in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter1]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter2]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter3]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter1]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter2]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter3]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter1]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter2]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter3]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cholesky-Newton solver did not converge after [0-9]* iterations-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter1]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter2]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter3]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter0]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter1]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter2]"
+  - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter3]"
+  - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings[42]"
+- reason: cuML doesn't enforce sparse matrix int64 indices restriction
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-liblinear]"
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-sag]"
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-saga]"
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-liblinear]"
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-sag]"
+  - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-saga]"
+- reason: cuML doesn't support warm_start with newton solvers
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cg]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cholesky]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cg]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cholesky]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cg]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cholesky]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cg]"
+  - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cholesky]"
+- reason: cuML doesn't validate NaN input in the same way as sklearn
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_nan"
+- reason: cuML proxy doesn't replicate sklearn warnings in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_None_deprecated"
+  - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_non_elasticnet"
+  - "sklearn.linear_model.tests.test_logistic::test_logisticregression_warns_with_n_jobs"
+  - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l1-0.0]"
+  - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l2-1.0]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_deprecated[LogisticRegression]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-lbfgs]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cg]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cholesky]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-sag]"
+  - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-saga]"
+- reason: cuML proxy doesn't support direct writes to coef_/intercept_ attributes
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_write_parameters"
+- reason: cuML uses different solver backend, doesn't enforce liblinear restrictions
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_check_solver_option[LogisticRegression]"
+  - "sklearn.linear_model.tests.test_logistic::test_liblinear_multiclass_raises[LogisticRegression]"
+- reason: cuML uses float32 for liblinear solver, sklearn expects float64
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-False-liblinear]"
+  - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-True-liblinear]"
+  - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-False-liblinear]"
+  - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-True-liblinear]"
+- reason: cuPy sparse matrices don't support int64 dtype in decision_function output
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_array]"
+  - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_matrix]"
+  - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_array]"
+  - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_matrix]"
+- reason: cuml.accel handles sparse input differently in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_array]"
+  - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_matrix]"
+- reason: cuml.accel raises RuntimeError instead of ValueError for non-finite params in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params[42]"
+- reason: l1_min_c calculation differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_array]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_matrix]"
+  - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-squared_hinge-array]"
+- reason: liblinear solver behavior differs with cuml.accel in sklearn 1.8
+  condition: scikit-learn>=1.8
+  tests:
+  - "sklearn.linear_model.tests.test_logistic::test_liblinear_dual_random_state[42]"
+  - "sklearn.linear_model.tests.test_logistic::test_liblinear_with_large_values"
+  - "sklearn.svm.tests.test_svm::test_liblinear_set_coef[42]"

From 7114cf9982808f2da10bf84fbbfe6218f743a166 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 14:05:31 -0600
Subject: [PATCH 11/16] Improve output formatting in the summarize-results.py
 script.

---
 .../upstream/scikit-learn/test_config.yaml    |  3 +
 .../upstream/summarize-results.py             | 58 ++++++++++++-------
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
index e8948b3925..ae5ec75c51 100644
--- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
+++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
@@ -1,3 +1,6 @@
 # Configuration for scikit-learn tests
 threshold:
   fail_below: 85  # Minimum pass rate threshold [0-100]
+
+# Prefix to add to test IDs (e.g., "sklearn." for scikit-learn tests)
+test_id_prefix: "sklearn."
diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
index 076616c4a4..77db284537 100755
--- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py
+++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
@@ -99,20 +99,29 @@ def parse_args():
         type=int,
         help="Limit output to first N entries (default: no limit)",
     )
+    parser.add_argument(
+        "--test-id-prefix",
+        type=str,
+        help="Prefix to add to test IDs (e.g., 'sklearn.')",
+    )
     args = parser.parse_args()
 
+    # Load config if provided
+    config = load_config(args.config) if args.config is not None else {}
+
     # Handle fail-below threshold logic:
     # 1. Use command line value if provided
     # 2. Use config value if no command line value
     # 3. Use default of 0.0 if neither is provided
     if args.fail_below is None:
-        if args.config is not None:
-            config = load_config(args.config)
-            args.fail_below = config.get("threshold", {}).get(
-                "fail_below", 0.0
-            )
-        else:
-            args.fail_below = 0.0
+        args.fail_below = config.get("threshold", {}).get("fail_below", 0.0)
+
+    # Handle test-id-prefix logic:
+    # 1. Use command line value if provided
+    # 2. Use config value if no command line value
+    # 3. Use empty string if neither is provided
+    if args.test_id_prefix is None:
+        args.test_id_prefix = config.get("test_id_prefix", "")
 
     return args
 
@@ -241,14 +250,14 @@ def update_xfail_list(existing_list, test_results, xpassed_action="keep"):
     return final_groups
 
 
-def get_test_results(testsuite):
+def get_test_results(testsuite, prefix: str = ""):
     """Extract test results from testsuite.
 
     Returns dict mapping test IDs to their results.
     """
     results = {}
     for testcase in testsuite.findall(".//testcase"):
-        test_id = QuoteTestID(get_test_id(testcase))
+        test_id = QuoteTestID(get_test_id(testcase, prefix))
 
         failure = testcase.find("failure")
         error = testcase.find("error")
@@ -317,18 +326,23 @@ def format_table(rows, col_sep="  "):
     return formatted_rows
 
 
-def get_test_id(testcase) -> str:
+def get_test_id(testcase, prefix: str = "") -> str:
     classname = testcase.get("classname", "")
     name = testcase.get("name")
-    return f"{classname}::{name}" if classname else name
+    base_id = f"{classname}::{name}" if classname else name
+    # Add prefix if provided and not already present
+    if prefix and not base_id.startswith(prefix):
+        return f"{prefix}{base_id}"
+    return base_id
 
 
-def format_traceback_output(testsuite, limit=None):
+def format_traceback_output(testsuite, limit=None, prefix: str = ""):
     """Format test results showing tracebacks of failed tests.
 
     Args:
         testsuite: XML testsuite element containing test results
         limit: Optional limit on number of entries to show
+        prefix: Prefix to add to test IDs
 
     Returns:
         List of formatted strings containing test results and tracebacks
@@ -347,7 +361,7 @@ def format_traceback_output(testsuite, limit=None):
         error = testcase.find("error")
 
         if failure is not None or error is not None:
-            test_id = get_test_id(testcase)
+            test_id = get_test_id(testcase, prefix)
 
             msg = ""
             details = ""
@@ -364,7 +378,7 @@ def format_traceback_output(testsuite, limit=None):
             elif msg == "xfail":
                 continue  # Skip xfailed tests
 
-            output.append(f"\nTest: {test_id}")
+            output.append(f'\nTest: "{test_id}"')
             output.append("-" * 80)
             if msg:
                 output.append(f"Error: {msg}")
@@ -451,13 +465,15 @@ def main():
     pass_rate = (passed / total_tests * 100) if total_tests > 0 else 0
 
     if args.format == "traceback":
-        output = format_traceback_output(testsuite, args.limit)
+        output = format_traceback_output(
+            testsuite, args.limit, args.test_id_prefix
+        )
         print("\n".join(output))
         return
 
     if args.format == "xfail_list" or args.update_xfail_list:
         # Get test results
-        test_results = get_test_results(testsuite)
+        test_results = get_test_results(testsuite, args.test_id_prefix)
 
         if args.update_xfail_list:
             if not args.update_xfail_list.exists():
@@ -535,7 +551,7 @@ def main():
             failure = testcase.find("failure")
             error = testcase.find("error")
             if failure is not None or error is not None:
-                test_id = get_test_id(testcase)
+                test_id = get_test_id(testcase, args.test_id_prefix)
                 msg = ""
                 if failure is not None and failure.get("message") is not None:
                     msg = failure.get("message")
@@ -544,9 +560,9 @@ def main():
                 if "XPASS" in msg:
                     continue  # Skip xpassed tests in failure list
                 elif msg == "xfail":
-                    print(f"  {test_id} (xfail)")
+                    print(f'  "{test_id}" (xfail)')
                 else:
-                    print(f"  {test_id}")
+                    print(f'  "{test_id}"')
                 count += 1
 
     # List strict xpasses in verbose mode
@@ -566,8 +582,8 @@ def main():
                 elif error is not None and error.get("message") is not None:
                     msg = error.get("message")
                 if "XPASS(strict)" in msg:
-                    test_id = get_test_id(testcase)
-                    print(f"  {test_id}")
+                    test_id = get_test_id(testcase, args.test_id_prefix)
+                    print(f'  "{test_id}"')
                     count += 1
 
     # Check threshold

From 9531bb73b25662ed8067035b1501ab0f01e4e750 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Wed, 10 Dec 2025 14:17:42 -0600
Subject: [PATCH 12/16] Enable filtering in the summarize-results.py script.

---
 .../upstream/summarize-results.py             | 44 +++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
index 77db284537..7cfa9a4e3e 100755
--- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py
+++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
@@ -104,6 +104,13 @@ def parse_args():
         type=str,
         help="Prefix to add to test IDs (e.g., 'sklearn.')",
     )
+    parser.add_argument(
+        "-k",
+        "--filter",
+        type=str,
+        dest="filter_pattern",
+        help="Filter tests by ID pattern (substring match, case-insensitive)",
+    )
     args = parser.parse_args()
 
     # Load config if provided
@@ -132,6 +139,21 @@ def validate_threshold(threshold):
         raise ValueError("Threshold must be between 0 and 100")
 
 
+def matches_filter(test_id, pattern):
+    """Check if test ID matches the filter pattern (case-insensitive substring).
+
+    Args:
+        test_id: The test ID to check
+        pattern: The filter pattern (substring match, case-insensitive)
+
+    Returns:
+        True if pattern is None or test_id contains pattern
+    """
+    if pattern is None:
+        return True
+    return pattern.lower() in test_id.lower()
+
+
 def load_existing_xfail_list(path):
     """Load existing xfail list from file."""
     if not path.exists():
@@ -336,13 +358,16 @@ def get_test_id(testcase, prefix: str = "") -> str:
     return base_id
 
 
-def format_traceback_output(testsuite, limit=None, prefix: str = ""):
+def format_traceback_output(
+    testsuite, limit=None, prefix: str = "", filter_pattern=None
+):
     """Format test results showing tracebacks of failed tests.
 
     Args:
         testsuite: XML testsuite element containing test results
         limit: Optional limit on number of entries to show
         prefix: Prefix to add to test IDs
+        filter_pattern: Optional pattern to filter test IDs
 
     Returns:
         List of formatted strings containing test results and tracebacks
@@ -363,6 +388,10 @@ def format_traceback_output(testsuite, limit=None, prefix: str = ""):
         if failure is not None or error is not None:
             test_id = get_test_id(testcase, prefix)
 
+            # Apply filter
+            if not matches_filter(test_id, filter_pattern):
+                continue
+
             msg = ""
             details = ""
 
@@ -466,7 +495,7 @@ def main():
 
     if args.format == "traceback":
         output = format_traceback_output(
-            testsuite, args.limit, args.test_id_prefix
+            testsuite, args.limit, args.test_id_prefix, args.filter_pattern
         )
         print("\n".join(output))
         return
@@ -503,6 +532,9 @@ def main():
             for test_id, result in test_results.items():
                 if args.limit is not None and count >= args.limit:
                     break
+                # Apply filter
+                if not matches_filter(test_id, args.filter_pattern):
+                    continue
                 if result["status"] in ("fail", "xfail"):
                     if not xfail_list:
                         xfail_list.append(
@@ -552,6 +584,9 @@ def main():
             error = testcase.find("error")
             if failure is not None or error is not None:
                 test_id = get_test_id(testcase, args.test_id_prefix)
+                # Apply filter
+                if not matches_filter(test_id, args.filter_pattern):
+                    continue
                 msg = ""
                 if failure is not None and failure.get("message") is not None:
                     msg = failure.get("message")
@@ -576,13 +611,16 @@ def main():
             failure = testcase.find("failure")
             error = testcase.find("error")
             if failure is not None or error is not None:
+                test_id = get_test_id(testcase, args.test_id_prefix)
+                # Apply filter
+                if not matches_filter(test_id, args.filter_pattern):
+                    continue
                 msg = ""
                 if failure is not None and failure.get("message") is not None:
                     msg = failure.get("message")
                 elif error is not None and error.get("message") is not None:
                     msg = error.get("message")
                 if "XPASS(strict)" in msg:
-                    test_id = get_test_id(testcase, args.test_id_prefix)
                     print(f'  "{test_id}"')
                     count += 1
 

From 20a08cfae87afca6a3b23b1c247765d8012422a9 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Thu, 11 Dec 2025 13:45:28 -0600
Subject: [PATCH 13/16] revert changes to GHA config

---
 .github/workflows/test.yaml | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 83177fb695..f84f0fa456 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -95,19 +95,8 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: "ci/test_python_cuml_accel_upstream.sh"
-      # Select amd64 with oldest deps, plus an "intermediate" entry based on oldest
-      matrix_filter: '(
-        map(select(.ARCH == "amd64"))
-        | map(select(.DEPENDENCIES == "oldest"))
-        | sort_by(.PY_VER)
-        )
-      +
-      (
-        map(select(.ARCH == "amd64"))
-        | map(select(.DEPENDENCIES == "oldest"))
-        | sort_by(.PY_VER)
-        | map(.DEPENDENCIES = "intermediate")
-      )'
+      # Select amd64 and one job per major CUDA version with the latest CUDA and Python versions
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([.CUDA_VER,.PY_VER]|map(split(".")|map(tonumber))))
       sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN
   wheel-tests-cuml:
     secrets: inherit

From 2b910d522eae1e47cfefb4ee57e1e6fb79121ca5 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Thu, 11 Dec 2025 13:59:54 -0600
Subject: [PATCH 14/16] Skip umap-learn upstream tests for scikit-learn 1.8.*.

---
 .../cuml_accel_tests/upstream/umap/run-tests.sh   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh
index f2bc065042..3e0249b6bf 100755
--- a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh
+++ b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh
@@ -14,6 +14,21 @@ set -eu
 
 UMAP_TAG="release-0.5.7"
 
+# Skip tests for scikit-learn >= 1.8 -- umap-learn is not compatible with scikit-learn 1.8 yet
+python -c "
+import sys
+from packaging.version import Version
+import sklearn
+sys.exit(
+    int(
+        Version(sklearn.__version__) >= Version('1.8')
+    )
+)
+" || {
+    echo "Skipping umap tests for scikit-learn >= 1.8"
+    exit 0
+}
+
 THIS_DIRECTORY=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )
 UMAP_REPO="${THIS_DIRECTORY}/umap-upstream"
 

From b321c80efa67669038d86650a442bc60a28a3781 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Fri, 12 Dec 2025 16:15:54 -0600
Subject: [PATCH 15/16] Revert "Revert later: Expand xfail-manager to support
 "set" function."

This reverts commit 83e42877ef00195ac962bc1fdbdc09b1332b1a86.
---
 .../upstream/xfail_manager.py                 | 252 ------------------
 1 file changed, 252 deletions(-)

diff --git a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
index d98d85e9de..15e0bb000b 100755
--- a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
+++ b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py
@@ -14,11 +14,9 @@
 - Deterministic formatting and sorting of xfail lists
 - Validation of group conditions
 - Cleanup of empty groups
-- Batch modification of test metadata
 
 CLI Commands:
 - format: Apply consistent formatting and sorting
-- set: Modify metadata (reason, condition, marker, strict) for specified tests
 
 The tool ensures xfail lists remain maintainable and produce clean diffs
 in version control systems.
@@ -173,135 +171,6 @@ def __init__(self, xfail_list_path: Optional[Union[str, Path]] = None):
         if xfail_list_path:
             self.load(xfail_list_path)
 
-    def find_test(self, test_id: str) -> Optional[XfailGroup]:
-        """Find the group containing a specific test.
-
-        Args:
-            test_id: The test ID to search for
-
-        Returns:
-            The XfailGroup containing the test, or None if not found
-        """
-        for group in self.groups:
-            if test_id in group.tests:
-                return group
-        return None
-
-    def remove_test(self, test_id: str) -> bool:
-        """Remove a test from its current group.
-
-        Args:
-            test_id: The test ID to remove
-
-        Returns:
-            True if the test was found and removed, False otherwise
-        """
-        for group in self.groups:
-            if test_id in group.tests:
-                group.tests.remove(test_id)
-                return True
-        return False
-
-    def set_test_metadata(
-        self,
-        test_ids: List[str],
-        reason: Optional[str] = None,
-        condition: Optional[str] = None,
-        marker: Optional[str] = None,
-        strict: Optional[bool] = None,
-        run: Optional[bool] = None,
-    ) -> Dict[str, Any]:
-        """Set metadata for specified tests, moving them to appropriate groups.
-
-        For each test, this method:
-        1. Finds the test's current group (if any) to get default metadata
-        2. Overrides only the metadata options that were explicitly provided
-        3. Removes the test from its original group
-        4. Adds the test to a group with the new metadata
-
-        Args:
-            test_ids: List of test IDs to modify
-            reason: New reason (if provided)
-            condition: New condition (if provided)
-            marker: New marker (if provided)
-            strict: New strict value (if provided)
-            run: New run value (if provided)
-
-        Returns:
-            Dictionary with 'moved', 'added', and 'not_found' lists
-        """
-        results = {"moved": [], "added": [], "not_found": []}
-
-        for test_id in test_ids:
-            test_id = QuoteTestID(test_id)
-
-            # Find current group for this test
-            current_group = self.find_test(test_id)
-
-            if current_group:
-                # Get defaults from current group
-                new_reason = (
-                    reason if reason is not None else current_group.reason
-                )
-                new_condition = (
-                    condition
-                    if condition is not None
-                    else current_group.condition
-                )
-                new_marker = (
-                    marker if marker is not None else current_group.marker
-                )
-                new_strict = (
-                    strict if strict is not None else current_group.strict
-                )
-                new_run = run if run is not None else current_group.run
-
-                # Remove from current group
-                self.remove_test(test_id)
-                results["moved"].append(test_id)
-            else:
-                # Test not found - use provided values or defaults
-                if reason is None:
-                    results["not_found"].append(test_id)
-                    continue
-
-                new_reason = reason
-                new_condition = condition
-                new_marker = marker
-                new_strict = strict if strict is not None else True
-                new_run = run if run is not None else True
-                results["added"].append(test_id)
-
-            # Find or create a group with matching metadata
-            target_group = None
-            for group in self.groups:
-                if (
-                    group.reason == new_reason
-                    and group.condition == new_condition
-                    and group.marker == new_marker
-                    and group.strict == new_strict
-                    and group.run == new_run
-                ):
-                    target_group = group
-                    break
-
-            if target_group is None:
-                # Create new group
-                target_group = XfailGroup(
-                    reason=new_reason,
-                    tests=[],
-                    strict=new_strict,
-                    condition=new_condition,
-                    run=new_run,
-                    marker=new_marker,
-                )
-                self.groups.append(target_group)
-
-            # Add test to target group
-            target_group.tests.append(test_id)
-
-        return results
-
     def load(self, xfail_list_path: Union[str, Path]) -> None:
         """Load xfail list from YAML file."""
         path = Path(xfail_list_path)
@@ -489,86 +358,6 @@ def _format_single_file(xfail_path, args):
         return 1
 
 
-def cmd_set(args):
-    """Set metadata for specified tests in the xfail list."""
-    xfail_path = Path(args.xfail_list)
-
-    if not xfail_path.exists():
-        print(
-            f"Error: Xfail list file not found: {xfail_path}", file=sys.stderr
-        )
-        return 1
-
-    # Validate that at least one metadata option is provided
-    has_metadata = any(
-        [
-            args.reason is not None,
-            args.condition is not None,
-            args.marker is not None,
-            args.strict is not None,
-        ]
-    )
-
-    if not has_metadata:
-        print(
-            "Error: At least one of --reason, --condition, --marker, "
-            "or --strict must be provided",
-            file=sys.stderr,
-        )
-        return 1
-
-    try:
-        manager = XfailManager(xfail_path)
-
-        results = manager.set_test_metadata(
-            test_ids=args.test_ids,
-            reason=args.reason,
-            condition=args.condition,
-            marker=args.marker,
-            strict=args.strict,
-        )
-
-        # Report results
-        if results["moved"]:
-            print(f"Moved {len(results['moved'])} test(s) to new group:")
-            for test_id in results["moved"]:
-                print(f"  {test_id}")
-
-        if results["added"]:
-            print(f"Added {len(results['added'])} new test(s):")
-            for test_id in results["added"]:
-                print(f"  {test_id}")
-
-        if results["not_found"]:
-            print(
-                f"Warning: {len(results['not_found'])} test(s) not found "
-                "(--reason required to add new tests):",
-                file=sys.stderr,
-            )
-            for test_id in results["not_found"]:
-                print(f"  {test_id}", file=sys.stderr)
-
-        # Clean up empty groups
-        manager.cleanup_empty_groups()
-
-        # Validate and save
-        validation_errors = manager.validate_conditions()
-        if validation_errors:
-            print("Validation errors:", file=sys.stderr)
-            for error in validation_errors:
-                print(f"  {error}", file=sys.stderr)
-            return 1
-
-        manager.save(xfail_path)
-        print(f"Updated {xfail_path}")
-
-        return 0 if not results["not_found"] else 1
-
-    except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
-        return 1
-
-
 def main():
     """Main CLI entry point."""
     parser = argparse.ArgumentParser(
@@ -594,47 +383,6 @@ def main():
     )
     format_parser.set_defaults(func=cmd_format)
 
-    # Set command
-    set_parser = subparsers.add_parser(
-        "set",
-        help="Set metadata for specified tests in the xfail list",
-    )
-    set_parser.add_argument(
-        "xfail_list",
-        help="Xfail list file to modify",
-    )
-    set_parser.add_argument(
-        "test_ids",
-        nargs="+",
-        metavar="TEST_ID",
-        help="Test IDs to modify",
-    )
-    set_parser.add_argument(
-        "--reason",
-        help="Set the reason for the xfail group",
-    )
-    set_parser.add_argument(
-        "--condition",
-        help="Set the condition for the xfail group (e.g., 'scikit-learn<1.8')",
-    )
-    set_parser.add_argument(
-        "--marker",
-        help="Set the pytest marker for the xfail group",
-    )
-    set_parser.add_argument(
-        "--strict",
-        action="store_true",
-        default=None,
-        help="Set strict=true for the xfail group",
-    )
-    set_parser.add_argument(
-        "--no-strict",
-        action="store_false",
-        dest="strict",
-        help="Set strict=false for the xfail group",
-    )
-    set_parser.set_defaults(func=cmd_set)
-
     args = parser.parse_args()
 
     if not args.command:

From 2a3316a63774fc0a48152fe10a39085d9a03aaf8 Mon Sep 17 00:00:00 2001
From: Simon Adorf <sadorf@nvidia.com>
Date: Fri, 12 Dec 2025 16:16:16 -0600
Subject: [PATCH 16/16] Revert changes to
 python/cuml/cuml_accel_tests/upstream/summarize-results.py

---
 .../upstream/scikit-learn/test_config.yaml    |  3 -
 .../upstream/summarize-results.py             | 96 ++++---------------
 2 files changed, 21 insertions(+), 78 deletions(-)

diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
index ae5ec75c51..e8948b3925 100644
--- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
+++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml
@@ -1,6 +1,3 @@
 # Configuration for scikit-learn tests
 threshold:
   fail_below: 85  # Minimum pass rate threshold [0-100]
-
-# Prefix to add to test IDs (e.g., "sklearn." for scikit-learn tests)
-test_id_prefix: "sklearn."
diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
index 7cfa9a4e3e..076616c4a4 100755
--- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py
+++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py
@@ -99,36 +99,20 @@ def parse_args():
         type=int,
         help="Limit output to first N entries (default: no limit)",
     )
-    parser.add_argument(
-        "--test-id-prefix",
-        type=str,
-        help="Prefix to add to test IDs (e.g., 'sklearn.')",
-    )
-    parser.add_argument(
-        "-k",
-        "--filter",
-        type=str,
-        dest="filter_pattern",
-        help="Filter tests by ID pattern (substring match, case-insensitive)",
-    )
     args = parser.parse_args()
 
-    # Load config if provided
-    config = load_config(args.config) if args.config is not None else {}
-
     # Handle fail-below threshold logic:
     # 1. Use command line value if provided
     # 2. Use config value if no command line value
     # 3. Use default of 0.0 if neither is provided
     if args.fail_below is None:
-        args.fail_below = config.get("threshold", {}).get("fail_below", 0.0)
-
-    # Handle test-id-prefix logic:
-    # 1. Use command line value if provided
-    # 2. Use config value if no command line value
-    # 3. Use empty string if neither is provided
-    if args.test_id_prefix is None:
-        args.test_id_prefix = config.get("test_id_prefix", "")
+        if args.config is not None:
+            config = load_config(args.config)
+            args.fail_below = config.get("threshold", {}).get(
+                "fail_below", 0.0
+            )
+        else:
+            args.fail_below = 0.0
 
     return args
 
@@ -139,21 +123,6 @@ def validate_threshold(threshold):
         raise ValueError("Threshold must be between 0 and 100")
 
 
-def matches_filter(test_id, pattern):
-    """Check if test ID matches the filter pattern (case-insensitive substring).
-
-    Args:
-        test_id: The test ID to check
-        pattern: The filter pattern (substring match, case-insensitive)
-
-    Returns:
-        True if pattern is None or test_id contains pattern
-    """
-    if pattern is None:
-        return True
-    return pattern.lower() in test_id.lower()
-
-
 def load_existing_xfail_list(path):
     """Load existing xfail list from file."""
     if not path.exists():
@@ -272,14 +241,14 @@ def update_xfail_list(existing_list, test_results, xpassed_action="keep"):
     return final_groups
 
 
-def get_test_results(testsuite, prefix: str = ""):
+def get_test_results(testsuite):
     """Extract test results from testsuite.
 
     Returns dict mapping test IDs to their results.
     """
     results = {}
     for testcase in testsuite.findall(".//testcase"):
-        test_id = QuoteTestID(get_test_id(testcase, prefix))
+        test_id = QuoteTestID(get_test_id(testcase))
 
         failure = testcase.find("failure")
         error = testcase.find("error")
@@ -348,26 +317,18 @@ def format_table(rows, col_sep="  "):
     return formatted_rows
 
 
-def get_test_id(testcase, prefix: str = "") -> str:
+def get_test_id(testcase) -> str:
     classname = testcase.get("classname", "")
     name = testcase.get("name")
-    base_id = f"{classname}::{name}" if classname else name
-    # Add prefix if provided and not already present
-    if prefix and not base_id.startswith(prefix):
-        return f"{prefix}{base_id}"
-    return base_id
+    return f"{classname}::{name}" if classname else name
 
 
-def format_traceback_output(
-    testsuite, limit=None, prefix: str = "", filter_pattern=None
-):
+def format_traceback_output(testsuite, limit=None):
     """Format test results showing tracebacks of failed tests.
 
     Args:
         testsuite: XML testsuite element containing test results
         limit: Optional limit on number of entries to show
-        prefix: Prefix to add to test IDs
-        filter_pattern: Optional pattern to filter test IDs
 
     Returns:
         List of formatted strings containing test results and tracebacks
@@ -386,11 +347,7 @@ def format_traceback_output(
         error = testcase.find("error")
 
         if failure is not None or error is not None:
-            test_id = get_test_id(testcase, prefix)
-
-            # Apply filter
-            if not matches_filter(test_id, filter_pattern):
-                continue
+            test_id = get_test_id(testcase)
 
             msg = ""
             details = ""
@@ -407,7 +364,7 @@ def format_traceback_output(
             elif msg == "xfail":
                 continue  # Skip xfailed tests
 
-            output.append(f'\nTest: "{test_id}"')
+            output.append(f"\nTest: {test_id}")
             output.append("-" * 80)
             if msg:
                 output.append(f"Error: {msg}")
@@ -494,15 +451,13 @@ def main():
     pass_rate = (passed / total_tests * 100) if total_tests > 0 else 0
 
     if args.format == "traceback":
-        output = format_traceback_output(
-            testsuite, args.limit, args.test_id_prefix, args.filter_pattern
-        )
+        output = format_traceback_output(testsuite, args.limit)
         print("\n".join(output))
         return
 
     if args.format == "xfail_list" or args.update_xfail_list:
         # Get test results
-        test_results = get_test_results(testsuite, args.test_id_prefix)
+        test_results = get_test_results(testsuite)
 
         if args.update_xfail_list:
             if not args.update_xfail_list.exists():
@@ -532,9 +487,6 @@ def main():
             for test_id, result in test_results.items():
                 if args.limit is not None and count >= args.limit:
                     break
-                # Apply filter
-                if not matches_filter(test_id, args.filter_pattern):
-                    continue
                 if result["status"] in ("fail", "xfail"):
                     if not xfail_list:
                         xfail_list.append(
@@ -583,10 +535,7 @@ def main():
             failure = testcase.find("failure")
             error = testcase.find("error")
             if failure is not None or error is not None:
-                test_id = get_test_id(testcase, args.test_id_prefix)
-                # Apply filter
-                if not matches_filter(test_id, args.filter_pattern):
-                    continue
+                test_id = get_test_id(testcase)
                 msg = ""
                 if failure is not None and failure.get("message") is not None:
                     msg = failure.get("message")
@@ -595,9 +544,9 @@ def main():
                 if "XPASS" in msg:
                     continue  # Skip xpassed tests in failure list
                 elif msg == "xfail":
-                    print(f'  "{test_id}" (xfail)')
+                    print(f"  {test_id} (xfail)")
                 else:
-                    print(f'  "{test_id}"')
+                    print(f"  {test_id}")
                 count += 1
 
     # List strict xpasses in verbose mode
@@ -611,17 +560,14 @@ def main():
             failure = testcase.find("failure")
             error = testcase.find("error")
             if failure is not None or error is not None:
-                test_id = get_test_id(testcase, args.test_id_prefix)
-                # Apply filter
-                if not matches_filter(test_id, args.filter_pattern):
-                    continue
                 msg = ""
                 if failure is not None and failure.get("message") is not None:
                     msg = failure.get("message")
                 elif error is not None and error.get("message") is not None:
                     msg = error.get("message")
                 if "XPASS(strict)" in msg:
-                    print(f'  "{test_id}"')
+                    test_id = get_test_id(testcase)
+                    print(f"  {test_id}")
                     count += 1
 
     # Check threshold