rapidsai · rapids-bot · Apr 25, 2025 · Apr 18, 2025 · Apr 22, 2025 · Apr 22, 2025
@@ -2,7 +2,7 @@
 # Copyright (c) 2025, NVIDIA CORPORATION.
 
 # Support invoking test script outside the script directory
-cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
+cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ || exit 1
 
 # Common setup steps shared by Python test jobs
 source ./ci/test_python_common.sh
@@ -15,12 +15,14 @@ set +e
 rapids-logger "Running scikit-learn tests with cuML acceleration"
 
 # Run the tests
-./ci/accel/scikit-learn-tests/run-tests.sh \
-    --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-accel-scikit-learn.xml" || true
+./python/cuml/cuml/accel/tests/scikit-learn/run-tests.sh \
+    --numprocesses=8 \
+    --dist=worksteal \
+    --junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-accel-scikit-learn.xml"
 
 # Analyze results and check pass rate threshold
 rapids-logger "Analyzing test results"
-./ci/accel/scikit-learn-tests/summarize-results.sh \
+./python/cuml/cuml/accel/tests/scikit-learn/summarize-results.py \
     --fail-below 80 \
     "${RAPIDS_TESTS_DIR}/junit-cuml-accel-scikit-learn.xml"
 

@@ -17,12 +17,18 @@
 from cuml.accel.core import enabled, install
 from cuml.accel.estimator_proxy import is_proxy
 from cuml.accel.magics import load_ipython_extension
-from cuml.accel.pytest_plugin import pytest_load_initial_conftests
+from cuml.accel.pytest_plugin import (
+    pytest_addoption,
+    pytest_collection_modifyitems,
+    pytest_load_initial_conftests,
+)
 
 __all__ = (
     "enabled",
     "install",
     "is_proxy",
     "load_ipython_extension",
     "pytest_load_initial_conftests",
+    "pytest_collection_modifyitems",
+    "pytest_addoption",
 )
@@ -14,6 +14,13 @@
 # limitations under the License.
 #
 
+from collections import defaultdict
+from importlib.metadata import version
+from pathlib import Path
+
+import yaml
+from packaging.requirements import Requirement
+
 from cuml.accel.core import install
 
 
@@ -26,3 +33,89 @@ def pytest_load_initial_conftests(early_config, parser, args):
         raise RuntimeError(
             "An existing plugin has already loaded sklearn. Interposing failed."
         )
+
+
+def pytest_addoption(parser):
+    """Add command line option for xfail list file."""
+    parser.addoption(
+        "--xfail-list",
+        action="store",
+        help="Path to YAML file containing list of test IDs to mark as xfail",
+    )
+
+
+def create_version_condition(condition_str: str) -> bool:
+    """Evaluate a version condition immediately.
+
+    Args:
+        condition_str: String in format 'package[comparison]version'
+                      For example:
+                      - 'scikit-learn>=1.5.2'
+                      - 'numpy<2.0.0'
+                      - 'pandas==2.1.0'
+
+    Returns:
+        bool: True if the condition is met, False otherwise
+    """
+    if not condition_str:
+        return True
+
+    try:
+        req = Requirement(condition_str)
+        installed_version = version(req.name)
+        return req.specifier.contains(installed_version)
+    except Exception:
+        return False
+
+
+def pytest_collection_modifyitems(config, items):
+    """Apply xfail markers to tests listed in the xfail list file."""
+    # Import pytest lazily to avoid requiring it for normal cuml usage.
+    # pytest is only needed when running tests.
+    import pytest
+
+    xfail_list_path = config.getoption("xfail_list")
+    if not xfail_list_path:
+        return
+
+    xfail_list_path = Path(xfail_list_path)
+    if not xfail_list_path.exists():
+        raise ValueError(f"Xfail list file not found: {xfail_list_path}")
+
+    xfail_list = yaml.safe_load(xfail_list_path.read_text())
+
+    if not isinstance(xfail_list, list):
+        raise ValueError("Xfail list must be a list of test entries")
+
+    # Convert list of dicts into dict mapping test IDs to lists of xfail configs
+    xfail_configs = defaultdict(list)
+    for entry in xfail_list:
+        if not isinstance(entry, dict):
+            raise ValueError("Xfail list entry must be a dictionary")
+        if "id" not in entry:
+            raise ValueError("Xfail list entry must contain an 'id' field")
+
+        test_id = entry["id"]
+        condition = True
+        if "condition" in entry:
+            condition = create_version_condition(entry["condition"])
+
+        config = {
+            "reason": entry.get("reason", "Test listed in xfail list"),
+            "strict": entry.get("strict", True),
+            "condition": condition,
+        }
+
+        xfail_configs[test_id].append(config)
+
+    for item in items:
+        test_id = f"{item.module.__name__}::{item.name}"
+        if test_id in xfail_configs:
+            for config in xfail_configs[test_id]:
+                item.add_marker(
+                    pytest.mark.xfail(
+                        reason=config["reason"],
+                        strict=config["strict"],
+                        condition=config["condition"],
+                    )
+                )
@@ -0,0 +1,109 @@
+# scikit-learn Acceleration Tests
+
+This suite provides infrastructure to run and analyze tests for scikit-learn with cuML acceleration support.
+
+## Components
+
+- `run-tests.sh`
+  Executes scikit-learn tests using GPU-accelerated paths. Any arguments passed to the script are forwarded directly to pytest.
+
+  Example usage:
+  ```bash
+  ./run-tests.sh                     # Run all tests
+  ./run-tests.sh -v -k test_kmeans   # Run specific test with verbosity
+  ./run-tests.sh -x --pdb            # Stop on first failure and debug
+  ```
+
+- `summarize-results.py`
+  Analyzes test results from an XML report file and prints a summary or generates an xfail list.
+  Options:
+  - `-v, --verbose`          : Display detailed failure information
+  - `-f, --fail-below VALUE` : Set a minimum pass rate threshold (0-100)
+  - `--format FORMAT`        : Output format (summary or xfail_list)
+  - `--update-xfail-list PATH` : Path to existing xfail list to update
+  - `-i, --in-place`        : Update the xfail list file in place
+  - `--xpassed ACTION`      : How to handle XPASS tests (keep/remove/mark-flaky)
+
+## Usage
+
+### 1. Run tests and generate report
+Run tests and save the report:
+```bash
+./run-tests.sh --junitxml=report.xml
+```
+
+**Tip**: Run tests in parallel with `-n auto` to use all available CPU cores:
+```bash
+./run-tests.sh --junitxml=report.xml -n auto
+```
+
+### 2. Analyze results
+Generate a summary from the report:
+```bash
+./summarize-results.py -v -f 80 report.xml
+```
+
+## Xfail List
+
+The xfail list (`xfail-list.yaml`) is used to mark tests that are expected to fail. This is useful for:
+- Tracking known issues
+- Managing test failures during development
+- Handling version-specific test failures
+- Managing flaky tests that occasionally fail
+
+### Automatic Usage
+The `run-tests.sh` script automatically uses an `xfail-list.yaml` file if present in the same directory.
+
+### Generating an Xfail List
+The `summarize-results.py` script provides several ways to manage the xfail list:
+
+1. Generate a new xfail list from test results:
+```bash
+./summarize-results.py --format=xfail_list report.xml > xfail-list.yaml
+```
+
+2. Update an existing xfail list (in place):
+```bash
+./summarize-results.py --update-xfail-list=xfail-list.yaml --in-place report.xml
+```
+
+The script handles XPASS tests in three ways (controlled by `--xpassed`):
+- `keep`: Preserve all xpassed tests in the list (default)
+- `remove`: Remove xpassed tests from the list
+- `mark-flaky`: Convert strict xpassed tests to non-strict (flaky)
+
+Example with all options:
+```bash
+./summarize-results.py --update-xfail-list=xfail-list.yaml --in-place --xpassed=mark-flaky report.xml
+```
+
+### Format
+The xfail list is a YAML file containing test IDs to mark as xfail. Each entry can include:
+- `id`: Test ID in format "module::test_name"
+- `reason`: Optional reason for xfail (default: "Test listed in xfail list")
+- `strict`: Whether to enforce xfail (default: true)
+- `condition`: Optional version requirement (e.g., "scikit-learn>=1.5.2")
+
+Example:
+```yaml
+- id: "sklearn.linear_model.tests.test_logistic::test_logistic_regression"
+  reason: "Known issue with sparse inputs"
+  strict: true
+- id: "sklearn.cluster.tests.test_k_means::test_kmeans_convergence[42-elkan]"
+  condition: "scikit-learn<1.5.2"
+  reason: "Unsupported hyperparameter for older scikit-learn version."
+- id: "sklearn.ensemble.tests.test_forest::test_random_forest_classifier"
+  reason: "Flaky test due to random seed sensitivity"
+  strict: false
+```
+
+**Note on `strict: false`**:
+The `strict` flag should be set to `true` by default. Use `strict: false` only for:
+- Tests that are genuinely non-deterministic (e.g., due to floating-point arithmetic)
+- Tests that fail intermittently due to external factors (e.g., network timeouts)
+- Tests that are known to be flaky but cannot be fixed immediately
+
+Ideally, Each use of `strict: false` should include:
+- A clear explanation of why the test is non-deterministic
+- A plan to fix the underlying issue
+- Regular review to ensure the flag is still necessary