From 5e8f6c6a1f7fa51c263e9e3883847df0088261cc Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Thu, 11 Dec 2025 10:06:36 -0600 Subject: [PATCH 01/16] Revert "Pin scikit-learn<1.8 in test dependencies (#7588)" This reverts commit 0b41aeb18390fb6e66e0e22acf6820a9c311b2fa. --- conda/environments/all_cuda-129_arch-aarch64.yaml | 1 - conda/environments/all_cuda-129_arch-x86_64.yaml | 1 - conda/environments/all_cuda-130_arch-aarch64.yaml | 1 - conda/environments/all_cuda-130_arch-x86_64.yaml | 1 - dependencies.yaml | 2 +- python/cuml/pyproject.toml | 2 +- 6 files changed, 2 insertions(+), 6 deletions(-) diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 96adb871d2..140479f536 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 7839dc67b7..5f8e8903a6 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 494da2e884..449b5ec714 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 3e8908c335..6b2b6754d1 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -71,7 +71,6 @@ dependencies: - rmm==26.2.*,>=0.0.0a0 - scikit-build-core>=0.10.0 - scikit-learn>=1.4 -- scikit-learn>=1.4,<1.8.0 - scipy>=1.11.0 - seaborn - sphinx diff --git a/dependencies.yaml b/dependencies.yaml index f77919dd8d..dc71360a08 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -486,7 +486,7 @@ dependencies: - pytest-cov - pytest-xdist - seaborn - - scikit-learn>=1.4,<1.8.0 + - *scikit_learn - statsmodels - tenacity - umap-learn==0.5.7 diff --git a/python/cuml/pyproject.toml b/python/cuml/pyproject.toml index 69e930c553..77569d1b6a 100644 --- a/python/cuml/pyproject.toml +++ b/python/cuml/pyproject.toml @@ -120,7 +120,7 @@ test = [ "pytest-xdist", "pytest<9.0", "pyyaml", - "scikit-learn>=1.4,<1.8.0", + "scikit-learn>=1.4", "seaborn", "statsmodels", "tenacity", From 2a35495ed069779db7a118bab4b6bfb30764f6b7 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:13:05 -0600 Subject: [PATCH 02/16] Skip HDBSCAN tests with scikit-learn 1.8. --- python/cuml/cuml_accel_tests/integration/test_hdbscan.py | 3 +++ python/cuml/tests/test_hdbscan.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py index 68b80205a3..c7c01efd81 100644 --- a/python/cuml/cuml_accel_tests/integration/test_hdbscan.py +++ b/python/cuml/cuml_accel_tests/integration/test_hdbscan.py @@ -13,6 +13,9 @@ from sklearn.preprocessing import StandardScaler if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + # NOTE: Remove this skip when issue + # https://github.com/scikit-learn-contrib/hdbscan/issues/689 is resolved, + # as it blocks compatibility with scikit-learn >= 1.8.0.dev0. pytest.skip( "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True ) diff --git a/python/cuml/tests/test_hdbscan.py b/python/cuml/tests/test_hdbscan.py index c6d876c8ac..ac3bfb8821 100644 --- a/python/cuml/tests/test_hdbscan.py +++ b/python/cuml/tests/test_hdbscan.py @@ -6,6 +6,8 @@ import numpy as np import pandas as pd import pytest +import sklearn +from packaging.version import Version from pylibraft.common import DeviceResourcesSNMG from sklearn import datasets from sklearn.datasets import make_blobs @@ -23,6 +25,11 @@ from cuml.testing.datasets import make_pattern from cuml.testing.utils import array_equal +if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip( + "hdbscan requires sklearn < 1.8.0.dev0", allow_module_level=True + ) + dataset_names = ["noisy_circles", "noisy_moons", "varied"] From e61e4a58f40086798fd5735f9f0be6729ff92e9c Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:18:56 -0600 Subject: [PATCH 03/16] Make test_logistic_regression_model_default test compatible with scikit-learn 1.8. --- python/cuml/tests/test_linear_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index e4e243dedf..34a15d47d4 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -417,7 +417,7 @@ def test_logistic_regression_model_default(dtype): y_test = y_test.astype(dtype) culog = cuLog() culog.fit(X_train, y_train) - sklog = skLog(multi_class="auto") + sklog = skLog() sklog.fit(X_train, y_train) From 477c2e9e07ad69c79ae962fdd0a448876afaf34d Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:31:59 -0600 Subject: [PATCH 04/16] Make test_device_selection test compatible with sklearn 1.8. --- python/cuml/tests/test_fil.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cuml/tests/test_fil.py b/python/cuml/tests/test_fil.py index 31997b52df..63c991702b 100644 --- a/python/cuml/tests/test_fil.py +++ b/python/cuml/tests/test_fil.py @@ -10,7 +10,9 @@ import numpy as np import pandas as pd import pytest +import sklearn import treelite +from packaging.version import Version # Import XGBoost before scikit-learn to work around a libgomp bug # See https://github.com/dmlc/xgboost/issues/7110 @@ -910,6 +912,9 @@ def test_device_selection(device_id, model_kind, tmp_path): ) xgb_model.fit(X, y) model_path = os.path.join(tmp_path, "xgb_class.ubj") + # skip with sklearn version 1.8.0.dev0 + if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip("xgboost is incompatible with sklearn >= 1.8.0.dev0") xgb_model.save_model(model_path) fm = ForestInference.load( model_path, From 9986457955b18694e389e64ff18c5ba07f99e358 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:37:15 -0600 Subject: [PATCH 05/16] Skip UMAP-learn tests with scikit-learn 1.8. The package umap-learn package is incompatible. --- python/cuml/tests/test_umap.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cuml/tests/test_umap.py b/python/cuml/tests/test_umap.py index a66c48dc34..4e80387de6 100644 --- a/python/cuml/tests/test_umap.py +++ b/python/cuml/tests/test_umap.py @@ -9,7 +9,9 @@ import numpy as np import pytest import scipy.sparse as scipy_sparse +import sklearn import umap +from packaging.version import Version from pylibraft.common import DeviceResourcesSNMG from sklearn import datasets from sklearn.cluster import KMeans @@ -29,6 +31,9 @@ unit_param, ) +if Version(sklearn.__version__) >= Version("1.8.0.dev0"): + pytest.skip("umap requires sklearn < 1.8.0.dev0", allow_module_level=True) + dataset_names = ["iris", "digits", "wine", "blobs"] From a4c26fb0c87b5da7275ba6d1ac397373313bcd3e Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:41:39 -0600 Subject: [PATCH 06/16] Conditionally skip test_trustworthiness due to umap incompatibility. --- python/cuml/tests/test_trustworthiness.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cuml/tests/test_trustworthiness.py b/python/cuml/tests/test_trustworthiness.py index e684f26442..668a9375df 100644 --- a/python/cuml/tests/test_trustworthiness.py +++ b/python/cuml/tests/test_trustworthiness.py @@ -4,6 +4,8 @@ import cudf import numpy as np import pytest +import sklearn +from packaging.version import Version from sklearn.datasets import make_blobs from sklearn.manifold import trustworthiness as sklearn_trustworthiness from umap import UMAP @@ -19,6 +21,10 @@ @pytest.mark.filterwarnings( "ignore:n_jobs value.*overridden.*by setting random_state.*:UserWarning" ) +@pytest.mark.xfail( + condition=Version(sklearn.__version__) >= Version("1.8.0.dev0"), + reason="umap-learn is incompatible with sklearn >= 1.8.0", +) def test_trustworthiness( input_type, n_samples, n_features, n_components, batch_size ): From 64bdb197cda3bf283c538b8788f375cf7d2ad514 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 09:59:08 -0600 Subject: [PATCH 07/16] Make test_logistic_regression compatible with scikit-learn 1.8. --- python/cuml/tests/test_linear_model.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index 34a15d47d4..062d546675 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -329,7 +329,6 @@ def test_logistic_regression( solver="saga", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) else: sklog = skLog( @@ -337,7 +336,6 @@ def test_logistic_regression( solver="saga", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) else: sklog = skLog( @@ -345,7 +343,6 @@ def test_logistic_regression( solver="lbfgs", C=C, fit_intercept=fit_intercept, - multi_class="auto", ) sklog.fit(X_train, y_train) @@ -586,11 +583,7 @@ def test_logistic_regression_predict_proba( sklog = skLog( fit_intercept=fit_intercept, - **( - {"solver": "lbfgs", "multi_class": "multinomial"} - if num_classes > 2 - else {} - ), + **({"solver": "lbfgs"} if num_classes > 2 else {}), ) sklog.coef_ = culog.coef_ sklog.intercept_ = culog.intercept_ if fit_intercept else 0 From 3b954a1f142ec057c10e925911ff609917e95196 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 10:46:42 -0600 Subject: [PATCH 08/16] Run the cuml-accel upstream tests on oldest and intermediate deps. UMAP-learn is not yet compatible with scikit-learn 1.8. --- .github/workflows/test.yaml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f84f0fa456..83177fb695 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -95,8 +95,19 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: "ci/test_python_cuml_accel_upstream.sh" - # Select amd64 and one job per major CUDA version with the latest CUDA and Python versions - matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([.CUDA_VER,.PY_VER]|map(split(".")|map(tonumber)))) + # Select amd64 with oldest deps, plus an "intermediate" entry based on oldest + matrix_filter: '( + map(select(.ARCH == "amd64")) + | map(select(.DEPENDENCIES == "oldest")) + | sort_by(.PY_VER) + ) + + + ( + map(select(.ARCH == "amd64")) + | map(select(.DEPENDENCIES == "oldest")) + | sort_by(.PY_VER) + | map(.DEPENDENCIES = "intermediate") + )' sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN wheel-tests-cuml: secrets: inherit From 83e42877ef00195ac962bc1fdbdc09b1332b1a86 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 12:44:43 -0600 Subject: [PATCH 09/16] Revert later: Expand xfail-manager to support "set" function. --- .../upstream/xfail_manager.py | 252 ++++++++++++++++++ 1 file changed, 252 insertions(+) diff --git a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py index 15e0bb000b..d98d85e9de 100755 --- a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py +++ b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py @@ -14,9 +14,11 @@ - Deterministic formatting and sorting of xfail lists - Validation of group conditions - Cleanup of empty groups +- Batch modification of test metadata CLI Commands: - format: Apply consistent formatting and sorting +- set: Modify metadata (reason, condition, marker, strict) for specified tests The tool ensures xfail lists remain maintainable and produce clean diffs in version control systems. @@ -171,6 +173,135 @@ def __init__(self, xfail_list_path: Optional[Union[str, Path]] = None): if xfail_list_path: self.load(xfail_list_path) + def find_test(self, test_id: str) -> Optional[XfailGroup]: + """Find the group containing a specific test. + + Args: + test_id: The test ID to search for + + Returns: + The XfailGroup containing the test, or None if not found + """ + for group in self.groups: + if test_id in group.tests: + return group + return None + + def remove_test(self, test_id: str) -> bool: + """Remove a test from its current group. + + Args: + test_id: The test ID to remove + + Returns: + True if the test was found and removed, False otherwise + """ + for group in self.groups: + if test_id in group.tests: + group.tests.remove(test_id) + return True + return False + + def set_test_metadata( + self, + test_ids: List[str], + reason: Optional[str] = None, + condition: Optional[str] = None, + marker: Optional[str] = None, + strict: Optional[bool] = None, + run: Optional[bool] = None, + ) -> Dict[str, Any]: + """Set metadata for specified tests, moving them to appropriate groups. + + For each test, this method: + 1. Finds the test's current group (if any) to get default metadata + 2. Overrides only the metadata options that were explicitly provided + 3. Removes the test from its original group + 4. Adds the test to a group with the new metadata + + Args: + test_ids: List of test IDs to modify + reason: New reason (if provided) + condition: New condition (if provided) + marker: New marker (if provided) + strict: New strict value (if provided) + run: New run value (if provided) + + Returns: + Dictionary with 'moved', 'added', and 'not_found' lists + """ + results = {"moved": [], "added": [], "not_found": []} + + for test_id in test_ids: + test_id = QuoteTestID(test_id) + + # Find current group for this test + current_group = self.find_test(test_id) + + if current_group: + # Get defaults from current group + new_reason = ( + reason if reason is not None else current_group.reason + ) + new_condition = ( + condition + if condition is not None + else current_group.condition + ) + new_marker = ( + marker if marker is not None else current_group.marker + ) + new_strict = ( + strict if strict is not None else current_group.strict + ) + new_run = run if run is not None else current_group.run + + # Remove from current group + self.remove_test(test_id) + results["moved"].append(test_id) + else: + # Test not found - use provided values or defaults + if reason is None: + results["not_found"].append(test_id) + continue + + new_reason = reason + new_condition = condition + new_marker = marker + new_strict = strict if strict is not None else True + new_run = run if run is not None else True + results["added"].append(test_id) + + # Find or create a group with matching metadata + target_group = None + for group in self.groups: + if ( + group.reason == new_reason + and group.condition == new_condition + and group.marker == new_marker + and group.strict == new_strict + and group.run == new_run + ): + target_group = group + break + + if target_group is None: + # Create new group + target_group = XfailGroup( + reason=new_reason, + tests=[], + strict=new_strict, + condition=new_condition, + run=new_run, + marker=new_marker, + ) + self.groups.append(target_group) + + # Add test to target group + target_group.tests.append(test_id) + + return results + def load(self, xfail_list_path: Union[str, Path]) -> None: """Load xfail list from YAML file.""" path = Path(xfail_list_path) @@ -358,6 +489,86 @@ def _format_single_file(xfail_path, args): return 1 +def cmd_set(args): + """Set metadata for specified tests in the xfail list.""" + xfail_path = Path(args.xfail_list) + + if not xfail_path.exists(): + print( + f"Error: Xfail list file not found: {xfail_path}", file=sys.stderr + ) + return 1 + + # Validate that at least one metadata option is provided + has_metadata = any( + [ + args.reason is not None, + args.condition is not None, + args.marker is not None, + args.strict is not None, + ] + ) + + if not has_metadata: + print( + "Error: At least one of --reason, --condition, --marker, " + "or --strict must be provided", + file=sys.stderr, + ) + return 1 + + try: + manager = XfailManager(xfail_path) + + results = manager.set_test_metadata( + test_ids=args.test_ids, + reason=args.reason, + condition=args.condition, + marker=args.marker, + strict=args.strict, + ) + + # Report results + if results["moved"]: + print(f"Moved {len(results['moved'])} test(s) to new group:") + for test_id in results["moved"]: + print(f" {test_id}") + + if results["added"]: + print(f"Added {len(results['added'])} new test(s):") + for test_id in results["added"]: + print(f" {test_id}") + + if results["not_found"]: + print( + f"Warning: {len(results['not_found'])} test(s) not found " + "(--reason required to add new tests):", + file=sys.stderr, + ) + for test_id in results["not_found"]: + print(f" {test_id}", file=sys.stderr) + + # Clean up empty groups + manager.cleanup_empty_groups() + + # Validate and save + validation_errors = manager.validate_conditions() + if validation_errors: + print("Validation errors:", file=sys.stderr) + for error in validation_errors: + print(f" {error}", file=sys.stderr) + return 1 + + manager.save(xfail_path) + print(f"Updated {xfail_path}") + + return 0 if not results["not_found"] else 1 + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + + def main(): """Main CLI entry point.""" parser = argparse.ArgumentParser( @@ -383,6 +594,47 @@ def main(): ) format_parser.set_defaults(func=cmd_format) + # Set command + set_parser = subparsers.add_parser( + "set", + help="Set metadata for specified tests in the xfail list", + ) + set_parser.add_argument( + "xfail_list", + help="Xfail list file to modify", + ) + set_parser.add_argument( + "test_ids", + nargs="+", + metavar="TEST_ID", + help="Test IDs to modify", + ) + set_parser.add_argument( + "--reason", + help="Set the reason for the xfail group", + ) + set_parser.add_argument( + "--condition", + help="Set the condition for the xfail group (e.g., 'scikit-learn<1.8')", + ) + set_parser.add_argument( + "--marker", + help="Set the pytest marker for the xfail group", + ) + set_parser.add_argument( + "--strict", + action="store_true", + default=None, + help="Set strict=true for the xfail group", + ) + set_parser.add_argument( + "--no-strict", + action="store_false", + dest="strict", + help="Set strict=false for the xfail group", + ) + set_parser.set_defaults(func=cmd_set) + args = parser.parse_args() if not args.command: From e407f7ec2609aae7baa92aaf80cc38614ffb256d Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 12:51:50 -0600 Subject: [PATCH 10/16] Update scikit-learn xfail list for scikit-learn 1.8. --- .../upstream/scikit-learn/xfail-list.yaml | 344 ++++++++++++++---- 1 file changed, 283 insertions(+), 61 deletions(-) diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml index 421305135c..1629121a09 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml @@ -1,3 +1,44 @@ +- reason: AUC standard deviation differs slightly with cuml.accel in sklearn 1.8 + marker: cuml_accel_bugs + condition: scikit-learn>=1.8 + tests: + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]" + - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]" +- reason: Search CV sample weight equivalence differs with cuml.accel in sklearn 1.8 + marker: cuml_accel_bugs + condition: scikit-learn>=1.8 + tests: + - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]" +- reason: Test should fail with cuml.accel + marker: cuml_accel_bugs + condition: scikit-learn<1.8 + tests: + - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]" + - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]" + - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]" + - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]" - reason: Test should fail with cuml.accel marker: cuml_accel_bugs tests: @@ -200,7 +241,6 @@ - "sklearn.linear_model.tests.test_common::test_balance_property[42-True-LogisticRegression]" - "sklearn.linear_model.tests.test_coordinate_descent::test_check_input_false" - "sklearn.linear_model.tests.test_coordinate_descent::test_elasticnet_precompute_gram_weighted_samples" - - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_coordinate_descent[Lasso-1-kwargs1]" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_copy_X_False_check_input_False" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_float_precision" - "sklearn.linear_model.tests.test_coordinate_descent::test_enet_multitarget" @@ -215,7 +255,6 @@ - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_readonly_data" - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_toy" - "sklearn.linear_model.tests.test_coordinate_descent::test_lasso_zero" - - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence" - "sklearn.linear_model.tests.test_coordinate_descent::test_warm_start_convergence_with_regularizer_decrement" - "sklearn.linear_model.tests.test_ransac::test_perfect_horizontal_line" - "sklearn.linear_model.tests.test_ransac::test_ransac_exceed_max_skips" @@ -240,17 +279,13 @@ - "sklearn.linear_model.tests.test_sag::test_step_size_alpha_error" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_array]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_matrix]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-True-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-False-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-True-6-24-True-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-False-Lasso]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-6-24-True-Lasso]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-ElasticNet]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-True-6-24-False-Lasso]" @@ -262,8 +297,6 @@ - "sklearn.manifold.tests.test_t_sne::test_bh_match_exact" - "sklearn.manifold.tests.test_t_sne::test_binary_perplexity_stability" - "sklearn.manifold.tests.test_t_sne::test_n_iter_without_progress" - - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_array]" - - "sklearn.manifold.tests.test_t_sne::test_pca_initialization_not_compatible_with_sparse_input[csr_matrix]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-barnes_hut]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[pca-exact]" - "sklearn.manifold.tests.test_t_sne::test_preserve_trustworthiness_approximately[random-barnes_hut]" @@ -451,11 +484,6 @@ - "sklearn.neighbors.tests.test_neighbors::test_neighbors_metrics[float64-minkowski]" - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csc_array]" - "sklearn.preprocessing.tests.test_data::test_standard_scaler_partial_fit_numerical_stability[csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]" @@ -781,11 +809,6 @@ - "sklearn.model_selection.tests.test_classification_threshold::test_fit_and_score_over_thresholds_sample_weight" - "sklearn.model_selection.tests.test_classification_threshold::test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence" - "sklearn.model_selection.tests.test_validation::test_cross_val_predict_class_subset" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-isotonic]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-sigmoid]" - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-isotonic]" @@ -834,31 +857,21 @@ - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weight_equivalence_on_sparse_data]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_non_transformer_estimators_n_iter]" - "sklearn.utils.tests.test_estimator_checks::test_check_estimator_pairwise" -- reason: Test should fail with cuml.accel (scikit-learn 1.7) +- reason: Test should fail with cuml.accel (scikit-learn <1.8) marker: cuml_accel_bugs - condition: scikit-learn == 1.7.* + condition: scikit-learn<1.8 tests: - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-decision_function-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[False-False-predict_proba-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-decision_function-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-False-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_display_plotting_from_cv_results[True-False-predict_proba-True-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[None-curve_kwargs1]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-None]" - - "sklearn.metrics._plot.tests.test_roc_curve_display::test_roc_curve_from_cv_results_legend_label[single-curve_kwargs1]" - - "sklearn.model_selection.tests.test_search::test_search_cv_sample_weight_equivalence[estimator0]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-log-csr_matrix]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-log-csr_matrix]" +- reason: Test should fail with cuml.accel (scikit-learn<1.6) + marker: cuml_accel_bugs + condition: scikit-learn<1.6 + tests: + - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]" + - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]" - reason: Test should fail with cuml.accel (scikit-learn<1.7) marker: cuml_accel_bugs condition: scikit-learn<1.5 @@ -869,12 +882,6 @@ - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_array-auto]" - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_matrix-auto]" - "sklearn.manifold.tests.test_t_sne::test_n_iter_used" -- reason: Test should fail with cuml.accel (scikit-learn<1.7) - marker: cuml_accel_bugs - condition: scikit-learn<1.7 - tests: - - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[LogisticRegression()]" - - "sklearn.tests.test_common::test_pandas_column_name_consistency[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)0]" - reason: Test should fail with cuml.accel (scikit-learn<1.7) marker: cuml_accel_bugs condition: scikit-learn<1.7,>=1.5.0 @@ -906,6 +913,11 @@ strict: false tests: - "sklearn.tests.test_common::test_check_n_features_in_after_fitting[SpectralEmbedding()]" +- reason: cuml raises a different error if X doesn't have expected n features + marker: cuml_accel_check_n_features_in + condition: scikit-learn==1.7.* + tests: + - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]" - reason: cuml raises a different error if X doesn't have expected n features marker: cuml_accel_check_n_features_in condition: scikit-learn>=1.6 @@ -923,7 +935,6 @@ - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_n_features_in_after_fitting]" - - "sklearn.tests.test_common::test_estimators[SVC()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[SVR()-check_n_features_in_after_fitting]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_n_features_in_after_fitting]" - reason: cuml doesn't set `feature_names_in_` properly @@ -955,6 +966,15 @@ tests: - "sklearn.mixture.tests.test_gaussian_mixture::test_gaussian_mixture_precisions_init_diag" - "sklearn.utils.tests.test_estimator_html_repr::test_show_arrow_pipeline" +- reason: Test is flaky with cuml.accel + marker: cuml_accel_flaky + condition: scikit-learn<1.8 + strict: false + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]" - reason: Test is flaky with cuml.accel marker: cuml_accel_flaky condition: scikit-learn>=1.6 @@ -983,10 +1003,6 @@ - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[2]" - "sklearn.feature_selection.tests.test_sequential::test_unsupervised_model_fit[3]" - "sklearn.manifold.tests.test_spectral_embedding::test_pipeline_spectral_clustering" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-barnes_hut]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-exact]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-barnes_hut]" - - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-exact]" - "sklearn.manifold.tests.test_t_sne::test_optimization_minimizes_kl_divergence" - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[barnes_hut]" - "sklearn.manifold.tests.test_t_sne::test_uniform_grid[exact]" @@ -1159,6 +1175,11 @@ - "sklearn.linear_model._glm.tests.test_glm::test_linalg_warning_with_newton_solver[42]" - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_path_convergence_fail" - "sklearn.linear_model.tests.test_logistic::test_newton_cholesky_fallback_to_lbfgs[42]" +- reason: SVC input handling and validation + marker: cuml_accel_svc_estimator_checks + condition: scikit-learn<1.8 + tests: + - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params" - reason: SVC input handling and validation marker: cuml_accel_svc_estimator_checks condition: scikit-learn>=1.6 @@ -1168,7 +1189,6 @@ - reason: SVC input handling and validation marker: cuml_accel_svc_estimator_checks tests: - - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params" - "sklearn.tests.test_common::test_estimators[SVC()-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[SVC()-check_complex_data]" - "sklearn.tests.test_common::test_estimators[SVC()-check_estimators_nan_inf]" @@ -1349,10 +1369,47 @@ - "sklearn.tests.test_common::test_estimators[TSNE()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_sample_order_invariance]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_methods_subset_invariance]" +- reason: Calibration temperature scaling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[False-temperature]" + - "sklearn.tests.test_calibration::test_calibrated_classifier_cv_double_sample_weights_equivalence[True-temperature]" +- reason: Elasticnet scores attribute layout differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_scores_attribute_layout_elasticnet" - reason: Flaky deviations in n_iter_ values in cuml.accel strict: false tests: - "sklearn.cluster.tests.test_k_means::test_kmeans_elkan_results[42-1e-100-dense-blobs]" +- reason: Linear SVM sample weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params0]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params1]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params2]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVC-params3]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params4]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params5]" + - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[42-LinearSVR-params6]" +- reason: LinearSVC parameter validation differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-False-l2-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[42-True-l1-squared_hinge]" +- reason: LinearSVM + condition: scikit-learn<1.8 + tests: + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]" + - "sklearn.svm.tests.test_svm::test_liblinear_set_coef" + - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]" + - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]" - reason: LinearSVM condition: scikit-learn<=1.6 tests: @@ -1377,20 +1434,12 @@ - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-importance_getter0]" - "sklearn.feature_selection.tests.test_rfe::test_rfe_wrapped_estimator[RFECV-4-regressor_.coef_]" - "sklearn.model_selection.tests.test_search::test_grid_search_no_score" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-multi-class-squared_hinge-array]" - - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-two-classes-squared_hinge-array]" - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_array-dok_array]" - "sklearn.svm.tests.test_sparse::test_linearsvc[lil_matrix-dok_matrix]" - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_array]" - "sklearn.svm.tests.test_sparse::test_linearsvc_iris[csr_matrix]" - "sklearn.svm.tests.test_sparse::test_sparse_liblinear_intercept_handling" - "sklearn.svm.tests.test_svm::test_dense_liblinear_intercept_handling" - - "sklearn.svm.tests.test_svm::test_liblinear_set_coef" - - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l1-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[False-l2-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-hinge]" - - "sklearn.svm.tests.test_svm::test_linearsvc_parameters[True-l1-squared_hinge]" - "sklearn.tests.test_calibration::test_calibration_default_estimator" - "sklearn.tests.test_calibration::test_calibration_inconsistent_prefit_n_features_in" - "sklearn.tests.test_calibration::test_calibration_multiclass[1-False-isotonic]" @@ -1420,10 +1469,12 @@ - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVC()]" - "sklearn.tests.test_common::test_pandas_column_name_consistency[LinearSVR()]" - reason: LinearSVM test expects exact results on small data, which isn't guaranteed + condition: scikit-learn<1.8 tests: - "sklearn.svm.tests.test_svm::test_bad_input[lil_array]" - "sklearn.svm.tests.test_svm::test_bad_input[lil_matrix]" - reason: LinearSVM tests too strict of tolerances due to differences in solvers + condition: scikit-learn<1.8 tests: - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params0]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVC-params1]" @@ -1432,6 +1483,58 @@ - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params4]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params5]" - "sklearn.svm.tests.test_svm::test_linearsvm_liblinear_sample_weight[LinearSVR-params6]" +- reason: LogisticRegression liblinear sample weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params0]" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params1]" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_liblinear_sample_weight[42-params2]" +- reason: LogisticRegression missing _predict_proba_lr attribute with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logreg_predict_proba_multinomial[42]" +- reason: LogisticRegression multiclass solvers differ with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_solvers_multiclass[True]" +- reason: LogisticRegression sample weights handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-lbfgs-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-liblinear-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cg-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-newton-cholesky-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-sag-single]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_regression_sample_weights[42-saga-single]" +- reason: LogisticRegressionCV fold coefficients differ with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_LogisticRegressionCV_on_folds" +- reason: Multinomial logistic regression class weight handling differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_multinomial_logistic_regression_string_inputs" +- reason: 'Numerical precision difference: cuML uses float32, test expects float64 precision' + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-False]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv[42-True]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_cv_multinomial_score[42-neg_log_loss-multiclass_agg_list3]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[lbfgs]" + - "sklearn.linear_model.tests.test_logistic::test_logistic_glmnet[newton-cholesky]" +- reason: Numerical tolerance issue with Lasso sparse/dense equality in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-False-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-24-6-True-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-False-Lasso]" + - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_matrix-False-24-6-True-Lasso]" +- reason: Numerical tolerance issue with t-SNE sparse input in sklearn 1.8 (flaky) + condition: scikit-learn>=1.8 + strict: false + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-exact]" - reason: Ridge doesn't implement n_iter yet tests: - "sklearn.linear_model.tests.test_ridge::test_n_iter" @@ -1473,3 +1576,122 @@ - reason: The sklearn test has the error message accidentally flipped, our message is correct tests: - "sklearn.linear_model.tests.test_ridge::test_ridge_individual_penalties" +- reason: cuML TSNE barnes_hut produces poor quality embeddings with sparse input + condition: scikit-learn>=1.8 + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-random-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-random-barnes_hut]" +- reason: cuML TSNE does not support sparse input with PCA initialization + condition: scikit-learn>=1.8 + tests: + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_array-pca-exact]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-barnes_hut]" + - "sklearn.manifold.tests.test_t_sne::test_fit_transform_csr_matrix[csr_matrix-pca-exact]" +- reason: cuML does not emit ConvergenceWarning in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-lbfgs-lbfgs failed to converge-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-liblinear-Liblinear failed to converge, increase the number of iterations.-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cg-newton-cg failed to converge.* Increase the number of iterations.-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-newton-cholesky-Newton solver did not converge after [0-9]* iterations-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-sag-The max_iter was reached which means the coef_ did not converge-max_iter3]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter0]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter1]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter2]" + - "sklearn.linear_model.tests.test_logistic::test_max_iter[42-saga-The max_iter was reached which means the coef_ did not converge-max_iter3]" + - "sklearn.svm.tests.test_svm::test_linear_svm_convergence_warnings[42]" +- reason: cuML doesn't enforce sparse matrix int64 indices restriction + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-sag]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_array-saga]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-sag]" + - "sklearn.linear_model.tests.test_logistic::test_large_sparse_matrix[42-csr_matrix-saga]" +- reason: cuML doesn't support warm_start with newton solvers + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-False-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-1-True-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-False-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_warm_start_newton_solver[42-inf-True-newton-cholesky]" +- reason: cuML doesn't validate NaN input in the same way as sklearn + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_nan" +- reason: cuML proxy doesn't replicate sklearn warnings in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_None_deprecated" + - "sklearn.linear_model.tests.test_logistic::test_l1_ratio_non_elasticnet" + - "sklearn.linear_model.tests.test_logistic::test_logisticregression_warns_with_n_jobs" + - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l1-0.0]" + - "sklearn.linear_model.tests.test_logistic::test_lr_penalty_l1ratio_incompatible[l2-1.0]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_deprecated[LogisticRegression]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-lbfgs]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cg]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-newton-cholesky]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-sag]" + - "sklearn.linear_model.tests.test_logistic::test_penalty_none[42-saga]" +- reason: cuML proxy doesn't support direct writes to coef_/intercept_ attributes + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_write_parameters" +- reason: cuML uses different solver backend, doesn't enforce liblinear restrictions + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_check_solver_option[LogisticRegression]" + - "sklearn.linear_model.tests.test_logistic::test_liblinear_multiclass_raises[LogisticRegression]" +- reason: cuML uses float32 for liblinear solver, sklearn expects float64 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-False-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_array-True-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-False-liblinear]" + - "sklearn.linear_model.tests.test_logistic::test_dtype_match[csr_matrix-True-liblinear]" +- reason: cuPy sparse matrices don't support int64 dtype in decision_function output + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_array]" + - "sklearn.linear_model.tests.test_logistic::test_predict_2_classes[csr_matrix]" + - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_array]" + - "sklearn.linear_model.tests.test_logistic::test_predict_3_classes[csr_matrix]" +- reason: cuml.accel handles sparse input differently in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_array]" + - "sklearn.svm.tests.test_svm::test_bad_input[42-lil_matrix]" +- reason: cuml.accel raises RuntimeError instead of ValueError for non-finite params in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_svm::test_svc_nonfinite_params[42]" +- reason: l1_min_c calculation differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_array]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-log-csr_matrix]" + - "sklearn.svm.tests.test_bounds::test_l1_min_c[no-intercept-squared_hinge-array]" +- reason: liblinear solver behavior differs with cuml.accel in sklearn 1.8 + condition: scikit-learn>=1.8 + tests: + - "sklearn.linear_model.tests.test_logistic::test_liblinear_dual_random_state[42]" + - "sklearn.linear_model.tests.test_logistic::test_liblinear_with_large_values" + - "sklearn.svm.tests.test_svm::test_liblinear_set_coef[42]" From 7114cf9982808f2da10bf84fbbfe6218f743a166 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 14:05:31 -0600 Subject: [PATCH 11/16] Improve output formatting in the summarize-results.py script. --- .../upstream/scikit-learn/test_config.yaml | 3 + .../upstream/summarize-results.py | 58 ++++++++++++------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml index e8948b3925..ae5ec75c51 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml @@ -1,3 +1,6 @@ # Configuration for scikit-learn tests threshold: fail_below: 85 # Minimum pass rate threshold [0-100] + +# Prefix to add to test IDs (e.g., "sklearn." for scikit-learn tests) +test_id_prefix: "sklearn." diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py index 076616c4a4..77db284537 100755 --- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py +++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py @@ -99,20 +99,29 @@ def parse_args(): type=int, help="Limit output to first N entries (default: no limit)", ) + parser.add_argument( + "--test-id-prefix", + type=str, + help="Prefix to add to test IDs (e.g., 'sklearn.')", + ) args = parser.parse_args() + # Load config if provided + config = load_config(args.config) if args.config is not None else {} + # Handle fail-below threshold logic: # 1. Use command line value if provided # 2. Use config value if no command line value # 3. Use default of 0.0 if neither is provided if args.fail_below is None: - if args.config is not None: - config = load_config(args.config) - args.fail_below = config.get("threshold", {}).get( - "fail_below", 0.0 - ) - else: - args.fail_below = 0.0 + args.fail_below = config.get("threshold", {}).get("fail_below", 0.0) + + # Handle test-id-prefix logic: + # 1. Use command line value if provided + # 2. Use config value if no command line value + # 3. Use empty string if neither is provided + if args.test_id_prefix is None: + args.test_id_prefix = config.get("test_id_prefix", "") return args @@ -241,14 +250,14 @@ def update_xfail_list(existing_list, test_results, xpassed_action="keep"): return final_groups -def get_test_results(testsuite): +def get_test_results(testsuite, prefix: str = ""): """Extract test results from testsuite. Returns dict mapping test IDs to their results. """ results = {} for testcase in testsuite.findall(".//testcase"): - test_id = QuoteTestID(get_test_id(testcase)) + test_id = QuoteTestID(get_test_id(testcase, prefix)) failure = testcase.find("failure") error = testcase.find("error") @@ -317,18 +326,23 @@ def format_table(rows, col_sep=" "): return formatted_rows -def get_test_id(testcase) -> str: +def get_test_id(testcase, prefix: str = "") -> str: classname = testcase.get("classname", "") name = testcase.get("name") - return f"{classname}::{name}" if classname else name + base_id = f"{classname}::{name}" if classname else name + # Add prefix if provided and not already present + if prefix and not base_id.startswith(prefix): + return f"{prefix}{base_id}" + return base_id -def format_traceback_output(testsuite, limit=None): +def format_traceback_output(testsuite, limit=None, prefix: str = ""): """Format test results showing tracebacks of failed tests. Args: testsuite: XML testsuite element containing test results limit: Optional limit on number of entries to show + prefix: Prefix to add to test IDs Returns: List of formatted strings containing test results and tracebacks @@ -347,7 +361,7 @@ def format_traceback_output(testsuite, limit=None): error = testcase.find("error") if failure is not None or error is not None: - test_id = get_test_id(testcase) + test_id = get_test_id(testcase, prefix) msg = "" details = "" @@ -364,7 +378,7 @@ def format_traceback_output(testsuite, limit=None): elif msg == "xfail": continue # Skip xfailed tests - output.append(f"\nTest: {test_id}") + output.append(f'\nTest: "{test_id}"') output.append("-" * 80) if msg: output.append(f"Error: {msg}") @@ -451,13 +465,15 @@ def main(): pass_rate = (passed / total_tests * 100) if total_tests > 0 else 0 if args.format == "traceback": - output = format_traceback_output(testsuite, args.limit) + output = format_traceback_output( + testsuite, args.limit, args.test_id_prefix + ) print("\n".join(output)) return if args.format == "xfail_list" or args.update_xfail_list: # Get test results - test_results = get_test_results(testsuite) + test_results = get_test_results(testsuite, args.test_id_prefix) if args.update_xfail_list: if not args.update_xfail_list.exists(): @@ -535,7 +551,7 @@ def main(): failure = testcase.find("failure") error = testcase.find("error") if failure is not None or error is not None: - test_id = get_test_id(testcase) + test_id = get_test_id(testcase, args.test_id_prefix) msg = "" if failure is not None and failure.get("message") is not None: msg = failure.get("message") @@ -544,9 +560,9 @@ def main(): if "XPASS" in msg: continue # Skip xpassed tests in failure list elif msg == "xfail": - print(f" {test_id} (xfail)") + print(f' "{test_id}" (xfail)') else: - print(f" {test_id}") + print(f' "{test_id}"') count += 1 # List strict xpasses in verbose mode @@ -566,8 +582,8 @@ def main(): elif error is not None and error.get("message") is not None: msg = error.get("message") if "XPASS(strict)" in msg: - test_id = get_test_id(testcase) - print(f" {test_id}") + test_id = get_test_id(testcase, args.test_id_prefix) + print(f' "{test_id}"') count += 1 # Check threshold From 9531bb73b25662ed8067035b1501ab0f01e4e750 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Wed, 10 Dec 2025 14:17:42 -0600 Subject: [PATCH 12/16] Enable filtering in the summarize-results.py script. --- .../upstream/summarize-results.py | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py index 77db284537..7cfa9a4e3e 100755 --- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py +++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py @@ -104,6 +104,13 @@ def parse_args(): type=str, help="Prefix to add to test IDs (e.g., 'sklearn.')", ) + parser.add_argument( + "-k", + "--filter", + type=str, + dest="filter_pattern", + help="Filter tests by ID pattern (substring match, case-insensitive)", + ) args = parser.parse_args() # Load config if provided @@ -132,6 +139,21 @@ def validate_threshold(threshold): raise ValueError("Threshold must be between 0 and 100") +def matches_filter(test_id, pattern): + """Check if test ID matches the filter pattern (case-insensitive substring). + + Args: + test_id: The test ID to check + pattern: The filter pattern (substring match, case-insensitive) + + Returns: + True if pattern is None or test_id contains pattern + """ + if pattern is None: + return True + return pattern.lower() in test_id.lower() + + def load_existing_xfail_list(path): """Load existing xfail list from file.""" if not path.exists(): @@ -336,13 +358,16 @@ def get_test_id(testcase, prefix: str = "") -> str: return base_id -def format_traceback_output(testsuite, limit=None, prefix: str = ""): +def format_traceback_output( + testsuite, limit=None, prefix: str = "", filter_pattern=None +): """Format test results showing tracebacks of failed tests. Args: testsuite: XML testsuite element containing test results limit: Optional limit on number of entries to show prefix: Prefix to add to test IDs + filter_pattern: Optional pattern to filter test IDs Returns: List of formatted strings containing test results and tracebacks @@ -363,6 +388,10 @@ def format_traceback_output(testsuite, limit=None, prefix: str = ""): if failure is not None or error is not None: test_id = get_test_id(testcase, prefix) + # Apply filter + if not matches_filter(test_id, filter_pattern): + continue + msg = "" details = "" @@ -466,7 +495,7 @@ def main(): if args.format == "traceback": output = format_traceback_output( - testsuite, args.limit, args.test_id_prefix + testsuite, args.limit, args.test_id_prefix, args.filter_pattern ) print("\n".join(output)) return @@ -503,6 +532,9 @@ def main(): for test_id, result in test_results.items(): if args.limit is not None and count >= args.limit: break + # Apply filter + if not matches_filter(test_id, args.filter_pattern): + continue if result["status"] in ("fail", "xfail"): if not xfail_list: xfail_list.append( @@ -552,6 +584,9 @@ def main(): error = testcase.find("error") if failure is not None or error is not None: test_id = get_test_id(testcase, args.test_id_prefix) + # Apply filter + if not matches_filter(test_id, args.filter_pattern): + continue msg = "" if failure is not None and failure.get("message") is not None: msg = failure.get("message") @@ -576,13 +611,16 @@ def main(): failure = testcase.find("failure") error = testcase.find("error") if failure is not None or error is not None: + test_id = get_test_id(testcase, args.test_id_prefix) + # Apply filter + if not matches_filter(test_id, args.filter_pattern): + continue msg = "" if failure is not None and failure.get("message") is not None: msg = failure.get("message") elif error is not None and error.get("message") is not None: msg = error.get("message") if "XPASS(strict)" in msg: - test_id = get_test_id(testcase, args.test_id_prefix) print(f' "{test_id}"') count += 1 From 20a08cfae87afca6a3b23b1c247765d8012422a9 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Thu, 11 Dec 2025 13:45:28 -0600 Subject: [PATCH 13/16] revert changes to GHA config --- .github/workflows/test.yaml | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 83177fb695..f84f0fa456 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -95,19 +95,8 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: "ci/test_python_cuml_accel_upstream.sh" - # Select amd64 with oldest deps, plus an "intermediate" entry based on oldest - matrix_filter: '( - map(select(.ARCH == "amd64")) - | map(select(.DEPENDENCIES == "oldest")) - | sort_by(.PY_VER) - ) - + - ( - map(select(.ARCH == "amd64")) - | map(select(.DEPENDENCIES == "oldest")) - | sort_by(.PY_VER) - | map(.DEPENDENCIES = "intermediate") - )' + # Select amd64 and one job per major CUDA version with the latest CUDA and Python versions + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([.CUDA_VER,.PY_VER]|map(split(".")|map(tonumber)))) sccache-dist-token-secret-name: GIST_REPO_READ_ORG_GITHUB_TOKEN wheel-tests-cuml: secrets: inherit From 2b910d522eae1e47cfefb4ee57e1e6fb79121ca5 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Thu, 11 Dec 2025 13:59:54 -0600 Subject: [PATCH 14/16] Skip umap-learn upstream tests for scikit-learn 1.8.*. --- .../cuml_accel_tests/upstream/umap/run-tests.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh index f2bc065042..3e0249b6bf 100755 --- a/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh +++ b/python/cuml/cuml_accel_tests/upstream/umap/run-tests.sh @@ -14,6 +14,21 @@ set -eu UMAP_TAG="release-0.5.7" +# Skip tests for scikit-learn >= 1.8 -- umap-learn is not compatible with scikit-learn 1.8 yet +python -c " +import sys +from packaging.version import Version +import sklearn +sys.exit( + int( + Version(sklearn.__version__) >= Version('1.8') + ) +) +" || { + echo "Skipping umap tests for scikit-learn >= 1.8" + exit 0 +} + THIS_DIRECTORY=$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P ) UMAP_REPO="${THIS_DIRECTORY}/umap-upstream" From b321c80efa67669038d86650a442bc60a28a3781 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Fri, 12 Dec 2025 16:15:54 -0600 Subject: [PATCH 15/16] Revert "Revert later: Expand xfail-manager to support "set" function." This reverts commit 83e42877ef00195ac962bc1fdbdc09b1332b1a86. --- .../upstream/xfail_manager.py | 252 ------------------ 1 file changed, 252 deletions(-) diff --git a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py index d98d85e9de..15e0bb000b 100755 --- a/python/cuml/cuml_accel_tests/upstream/xfail_manager.py +++ b/python/cuml/cuml_accel_tests/upstream/xfail_manager.py @@ -14,11 +14,9 @@ - Deterministic formatting and sorting of xfail lists - Validation of group conditions - Cleanup of empty groups -- Batch modification of test metadata CLI Commands: - format: Apply consistent formatting and sorting -- set: Modify metadata (reason, condition, marker, strict) for specified tests The tool ensures xfail lists remain maintainable and produce clean diffs in version control systems. @@ -173,135 +171,6 @@ def __init__(self, xfail_list_path: Optional[Union[str, Path]] = None): if xfail_list_path: self.load(xfail_list_path) - def find_test(self, test_id: str) -> Optional[XfailGroup]: - """Find the group containing a specific test. - - Args: - test_id: The test ID to search for - - Returns: - The XfailGroup containing the test, or None if not found - """ - for group in self.groups: - if test_id in group.tests: - return group - return None - - def remove_test(self, test_id: str) -> bool: - """Remove a test from its current group. - - Args: - test_id: The test ID to remove - - Returns: - True if the test was found and removed, False otherwise - """ - for group in self.groups: - if test_id in group.tests: - group.tests.remove(test_id) - return True - return False - - def set_test_metadata( - self, - test_ids: List[str], - reason: Optional[str] = None, - condition: Optional[str] = None, - marker: Optional[str] = None, - strict: Optional[bool] = None, - run: Optional[bool] = None, - ) -> Dict[str, Any]: - """Set metadata for specified tests, moving them to appropriate groups. - - For each test, this method: - 1. Finds the test's current group (if any) to get default metadata - 2. Overrides only the metadata options that were explicitly provided - 3. Removes the test from its original group - 4. Adds the test to a group with the new metadata - - Args: - test_ids: List of test IDs to modify - reason: New reason (if provided) - condition: New condition (if provided) - marker: New marker (if provided) - strict: New strict value (if provided) - run: New run value (if provided) - - Returns: - Dictionary with 'moved', 'added', and 'not_found' lists - """ - results = {"moved": [], "added": [], "not_found": []} - - for test_id in test_ids: - test_id = QuoteTestID(test_id) - - # Find current group for this test - current_group = self.find_test(test_id) - - if current_group: - # Get defaults from current group - new_reason = ( - reason if reason is not None else current_group.reason - ) - new_condition = ( - condition - if condition is not None - else current_group.condition - ) - new_marker = ( - marker if marker is not None else current_group.marker - ) - new_strict = ( - strict if strict is not None else current_group.strict - ) - new_run = run if run is not None else current_group.run - - # Remove from current group - self.remove_test(test_id) - results["moved"].append(test_id) - else: - # Test not found - use provided values or defaults - if reason is None: - results["not_found"].append(test_id) - continue - - new_reason = reason - new_condition = condition - new_marker = marker - new_strict = strict if strict is not None else True - new_run = run if run is not None else True - results["added"].append(test_id) - - # Find or create a group with matching metadata - target_group = None - for group in self.groups: - if ( - group.reason == new_reason - and group.condition == new_condition - and group.marker == new_marker - and group.strict == new_strict - and group.run == new_run - ): - target_group = group - break - - if target_group is None: - # Create new group - target_group = XfailGroup( - reason=new_reason, - tests=[], - strict=new_strict, - condition=new_condition, - run=new_run, - marker=new_marker, - ) - self.groups.append(target_group) - - # Add test to target group - target_group.tests.append(test_id) - - return results - def load(self, xfail_list_path: Union[str, Path]) -> None: """Load xfail list from YAML file.""" path = Path(xfail_list_path) @@ -489,86 +358,6 @@ def _format_single_file(xfail_path, args): return 1 -def cmd_set(args): - """Set metadata for specified tests in the xfail list.""" - xfail_path = Path(args.xfail_list) - - if not xfail_path.exists(): - print( - f"Error: Xfail list file not found: {xfail_path}", file=sys.stderr - ) - return 1 - - # Validate that at least one metadata option is provided - has_metadata = any( - [ - args.reason is not None, - args.condition is not None, - args.marker is not None, - args.strict is not None, - ] - ) - - if not has_metadata: - print( - "Error: At least one of --reason, --condition, --marker, " - "or --strict must be provided", - file=sys.stderr, - ) - return 1 - - try: - manager = XfailManager(xfail_path) - - results = manager.set_test_metadata( - test_ids=args.test_ids, - reason=args.reason, - condition=args.condition, - marker=args.marker, - strict=args.strict, - ) - - # Report results - if results["moved"]: - print(f"Moved {len(results['moved'])} test(s) to new group:") - for test_id in results["moved"]: - print(f" {test_id}") - - if results["added"]: - print(f"Added {len(results['added'])} new test(s):") - for test_id in results["added"]: - print(f" {test_id}") - - if results["not_found"]: - print( - f"Warning: {len(results['not_found'])} test(s) not found " - "(--reason required to add new tests):", - file=sys.stderr, - ) - for test_id in results["not_found"]: - print(f" {test_id}", file=sys.stderr) - - # Clean up empty groups - manager.cleanup_empty_groups() - - # Validate and save - validation_errors = manager.validate_conditions() - if validation_errors: - print("Validation errors:", file=sys.stderr) - for error in validation_errors: - print(f" {error}", file=sys.stderr) - return 1 - - manager.save(xfail_path) - print(f"Updated {xfail_path}") - - return 0 if not results["not_found"] else 1 - - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - return 1 - - def main(): """Main CLI entry point.""" parser = argparse.ArgumentParser( @@ -594,47 +383,6 @@ def main(): ) format_parser.set_defaults(func=cmd_format) - # Set command - set_parser = subparsers.add_parser( - "set", - help="Set metadata for specified tests in the xfail list", - ) - set_parser.add_argument( - "xfail_list", - help="Xfail list file to modify", - ) - set_parser.add_argument( - "test_ids", - nargs="+", - metavar="TEST_ID", - help="Test IDs to modify", - ) - set_parser.add_argument( - "--reason", - help="Set the reason for the xfail group", - ) - set_parser.add_argument( - "--condition", - help="Set the condition for the xfail group (e.g., 'scikit-learn<1.8')", - ) - set_parser.add_argument( - "--marker", - help="Set the pytest marker for the xfail group", - ) - set_parser.add_argument( - "--strict", - action="store_true", - default=None, - help="Set strict=true for the xfail group", - ) - set_parser.add_argument( - "--no-strict", - action="store_false", - dest="strict", - help="Set strict=false for the xfail group", - ) - set_parser.set_defaults(func=cmd_set) - args = parser.parse_args() if not args.command: From 2a3316a63774fc0a48152fe10a39085d9a03aaf8 Mon Sep 17 00:00:00 2001 From: Simon Adorf Date: Fri, 12 Dec 2025 16:16:16 -0600 Subject: [PATCH 16/16] Revert changes to python/cuml/cuml_accel_tests/upstream/summarize-results.py --- .../upstream/scikit-learn/test_config.yaml | 3 - .../upstream/summarize-results.py | 96 ++++--------------- 2 files changed, 21 insertions(+), 78 deletions(-) diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml index ae5ec75c51..e8948b3925 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/test_config.yaml @@ -1,6 +1,3 @@ # Configuration for scikit-learn tests threshold: fail_below: 85 # Minimum pass rate threshold [0-100] - -# Prefix to add to test IDs (e.g., "sklearn." for scikit-learn tests) -test_id_prefix: "sklearn." diff --git a/python/cuml/cuml_accel_tests/upstream/summarize-results.py b/python/cuml/cuml_accel_tests/upstream/summarize-results.py index 7cfa9a4e3e..076616c4a4 100755 --- a/python/cuml/cuml_accel_tests/upstream/summarize-results.py +++ b/python/cuml/cuml_accel_tests/upstream/summarize-results.py @@ -99,36 +99,20 @@ def parse_args(): type=int, help="Limit output to first N entries (default: no limit)", ) - parser.add_argument( - "--test-id-prefix", - type=str, - help="Prefix to add to test IDs (e.g., 'sklearn.')", - ) - parser.add_argument( - "-k", - "--filter", - type=str, - dest="filter_pattern", - help="Filter tests by ID pattern (substring match, case-insensitive)", - ) args = parser.parse_args() - # Load config if provided - config = load_config(args.config) if args.config is not None else {} - # Handle fail-below threshold logic: # 1. Use command line value if provided # 2. Use config value if no command line value # 3. Use default of 0.0 if neither is provided if args.fail_below is None: - args.fail_below = config.get("threshold", {}).get("fail_below", 0.0) - - # Handle test-id-prefix logic: - # 1. Use command line value if provided - # 2. Use config value if no command line value - # 3. Use empty string if neither is provided - if args.test_id_prefix is None: - args.test_id_prefix = config.get("test_id_prefix", "") + if args.config is not None: + config = load_config(args.config) + args.fail_below = config.get("threshold", {}).get( + "fail_below", 0.0 + ) + else: + args.fail_below = 0.0 return args @@ -139,21 +123,6 @@ def validate_threshold(threshold): raise ValueError("Threshold must be between 0 and 100") -def matches_filter(test_id, pattern): - """Check if test ID matches the filter pattern (case-insensitive substring). - - Args: - test_id: The test ID to check - pattern: The filter pattern (substring match, case-insensitive) - - Returns: - True if pattern is None or test_id contains pattern - """ - if pattern is None: - return True - return pattern.lower() in test_id.lower() - - def load_existing_xfail_list(path): """Load existing xfail list from file.""" if not path.exists(): @@ -272,14 +241,14 @@ def update_xfail_list(existing_list, test_results, xpassed_action="keep"): return final_groups -def get_test_results(testsuite, prefix: str = ""): +def get_test_results(testsuite): """Extract test results from testsuite. Returns dict mapping test IDs to their results. """ results = {} for testcase in testsuite.findall(".//testcase"): - test_id = QuoteTestID(get_test_id(testcase, prefix)) + test_id = QuoteTestID(get_test_id(testcase)) failure = testcase.find("failure") error = testcase.find("error") @@ -348,26 +317,18 @@ def format_table(rows, col_sep=" "): return formatted_rows -def get_test_id(testcase, prefix: str = "") -> str: +def get_test_id(testcase) -> str: classname = testcase.get("classname", "") name = testcase.get("name") - base_id = f"{classname}::{name}" if classname else name - # Add prefix if provided and not already present - if prefix and not base_id.startswith(prefix): - return f"{prefix}{base_id}" - return base_id + return f"{classname}::{name}" if classname else name -def format_traceback_output( - testsuite, limit=None, prefix: str = "", filter_pattern=None -): +def format_traceback_output(testsuite, limit=None): """Format test results showing tracebacks of failed tests. Args: testsuite: XML testsuite element containing test results limit: Optional limit on number of entries to show - prefix: Prefix to add to test IDs - filter_pattern: Optional pattern to filter test IDs Returns: List of formatted strings containing test results and tracebacks @@ -386,11 +347,7 @@ def format_traceback_output( error = testcase.find("error") if failure is not None or error is not None: - test_id = get_test_id(testcase, prefix) - - # Apply filter - if not matches_filter(test_id, filter_pattern): - continue + test_id = get_test_id(testcase) msg = "" details = "" @@ -407,7 +364,7 @@ def format_traceback_output( elif msg == "xfail": continue # Skip xfailed tests - output.append(f'\nTest: "{test_id}"') + output.append(f"\nTest: {test_id}") output.append("-" * 80) if msg: output.append(f"Error: {msg}") @@ -494,15 +451,13 @@ def main(): pass_rate = (passed / total_tests * 100) if total_tests > 0 else 0 if args.format == "traceback": - output = format_traceback_output( - testsuite, args.limit, args.test_id_prefix, args.filter_pattern - ) + output = format_traceback_output(testsuite, args.limit) print("\n".join(output)) return if args.format == "xfail_list" or args.update_xfail_list: # Get test results - test_results = get_test_results(testsuite, args.test_id_prefix) + test_results = get_test_results(testsuite) if args.update_xfail_list: if not args.update_xfail_list.exists(): @@ -532,9 +487,6 @@ def main(): for test_id, result in test_results.items(): if args.limit is not None and count >= args.limit: break - # Apply filter - if not matches_filter(test_id, args.filter_pattern): - continue if result["status"] in ("fail", "xfail"): if not xfail_list: xfail_list.append( @@ -583,10 +535,7 @@ def main(): failure = testcase.find("failure") error = testcase.find("error") if failure is not None or error is not None: - test_id = get_test_id(testcase, args.test_id_prefix) - # Apply filter - if not matches_filter(test_id, args.filter_pattern): - continue + test_id = get_test_id(testcase) msg = "" if failure is not None and failure.get("message") is not None: msg = failure.get("message") @@ -595,9 +544,9 @@ def main(): if "XPASS" in msg: continue # Skip xpassed tests in failure list elif msg == "xfail": - print(f' "{test_id}" (xfail)') + print(f" {test_id} (xfail)") else: - print(f' "{test_id}"') + print(f" {test_id}") count += 1 # List strict xpasses in verbose mode @@ -611,17 +560,14 @@ def main(): failure = testcase.find("failure") error = testcase.find("error") if failure is not None or error is not None: - test_id = get_test_id(testcase, args.test_id_prefix) - # Apply filter - if not matches_filter(test_id, args.filter_pattern): - continue msg = "" if failure is not None and failure.get("message") is not None: msg = failure.get("message") elif error is not None and error.get("message") is not None: msg = error.get("message") if "XPASS(strict)" in msg: - print(f' "{test_id}"') + test_id = get_test_id(testcase) + print(f" {test_id}") count += 1 # Check threshold