rapidsai · betatim · Mar 14, 2025 · Mar 14, 2025 · Mar 14, 2025 · Mar 18, 2025
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -13,7 +13,7 @@ rapids-logger "Generate Python testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
   --file-key test_python \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
   --prepend-channel "${CPP_CHANNEL}" \
   --prepend-channel "${PYTHON_CHANNEL}" | tee env.yaml
 

@@ -8,10 +8,15 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
 RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
 RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist
 
+# generate constraints, the constraints will limit the version of the
+# dependencies that can be installed later on when installing the wheel
+rapids-generate-pip-constraints test_python ./constraints.txt
+
 # echo to expand wildcard before adding `[extra]` requires for pip
 rapids-pip-retry install \
   ./dist/libcuml*.whl \
-  "$(echo ./dist/cuml*.whl)[test]"
+  "$(echo ./dist/cuml*.whl)[test]" \
+  --constraint ./constraints.txt
 
 RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
 mkdir -p "${RAPIDS_TESTS_DIR}"

@@ -47,7 +47,7 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.59.1,<0.61.0a0
-- numpy>=1.23,<3.0a0
+- numpy>=1.24,<3.0a0
 - numpydoc
 - nvcc_linux-aarch64=11.8
 - packaging
@@ -67,7 +67,7 @@ dependencies:
 - recommonmark
 - rmm==25.6.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
-- scikit-learn==1.5.*
+- scikit-learn>=1.3.2
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

@@ -47,7 +47,7 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.59.1,<0.61.0a0
-- numpy>=1.23,<3.0a0
+- numpy>=1.24,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
 - packaging
@@ -67,7 +67,7 @@ dependencies:
 - recommonmark
 - rmm==25.6.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
-- scikit-learn==1.5.*
+- scikit-learn>=1.3.2
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

@@ -44,7 +44,7 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.59.1,<0.61.0a0
-- numpy>=1.23,<3.0a0
+- numpy>=1.24,<3.0a0
 - numpydoc
 - packaging
 - pydata-sphinx-theme!=0.14.2
@@ -63,7 +63,7 @@ dependencies:
 - recommonmark
 - rmm==25.6.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
-- scikit-learn==1.5.*
+- scikit-learn>=1.3.2
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

@@ -44,7 +44,7 @@ dependencies:
 - ninja
 - nltk
 - numba>=0.59.1,<0.61.0a0
-- numpy>=1.23,<3.0a0
+- numpy>=1.24,<3.0a0
 - numpydoc
 - packaging
 - pydata-sphinx-theme!=0.14.2
@@ -63,7 +63,7 @@ dependencies:
 - recommonmark
 - rmm==25.6.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
-- scikit-learn==1.5.*
+- scikit-learn>=1.3.2
 - scipy>=1.8.0
 - seaborn
 - sphinx-copybutton

@@ -268,13 +268,14 @@ dependencies:
           - dask-cuda==25.6.*,>=0.0.0a0
           - joblib>=0.11
           - numba>=0.59.1,<0.61.0a0
-          - numpy>=1.23,<3.0a0
+          - numpy>=1.24,<3.0a0
             # TODO: Is scipy really a hard dependency, or should
             # we make it optional (i.e. an extra for pip
             # installation/run_constrained for conda)?
           - scipy>=1.8.0
           - packaging
           - rapids-dask-dependency==25.6.*,>=0.0.0a0
+          - &scikit_learn scikit-learn>=1.3.2
           - *treelite
       - output_types: requirements
         packages:
@@ -439,7 +440,7 @@ dependencies:
           # https://github.com/pydata/pydata-sphinx-theme/issues/1539
           - pydata-sphinx-theme!=0.14.2
           - recommonmark
-          - &scikit_learn scikit-learn==1.5.*
+          - *scikit_learn
           - sphinx<8.2.0
           - sphinx-copybutton
           - sphinx-markdown-tables
@@ -476,6 +477,16 @@ dependencies:
         packages:
           - *cmake_ver
   test_python:
+    specific:
+      - output_types: [conda, requirements]
+        matrices:
+          - matrix: {dependencies: "oldest"}
+            packages:
+              - scikit-learn==1.3.2
+              - scipy==1.8.0
+              - numpy==1.24
+          - matrix:
+            packages:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:

@@ -23,11 +23,9 @@
 import functools
 import timeit
 import numbers
-from cuml.internals.import_utils import has_sklearn
 
-if has_sklearn():
-    from sklearn.base import clone
-    from sklearn.utils import Bunch
+from sklearn.base import clone
+from sklearn.utils import Bunch
 from contextlib import contextmanager
 from collections import defaultdict
 import warnings
@@ -561,9 +559,6 @@ def __init__(self,
                  n_jobs=None,
                  transformer_weights=None,
                  verbose=False):
-        if not has_sklearn():
-            raise ImportError("Scikit-learn is needed to use the "
-                              "Column Transformer")
         if not transformers:
             warnings.warn('Transformers are required')
         self.transformers = transformers

@@ -60,7 +60,14 @@
 from cuml.internals.safe_imports import cpu_only_import
 cpu_np = cpu_only_import('numpy')
 np = gpu_only_import('cupy')
-resample = cpu_only_import_from('sklearn.utils._indexing', 'resample')
+
+# In scikit-learn 1.4.x the `resample` function was available in
+# `sklearn.utils`, but got moved to `sklearn.utils._indexing` in 1.5.
+try:
+    resample = cpu_only_import_from('sklearn.utils._indexing', 'resample')
+except ModuleNotFoundError:
+    resample = cpu_only_import_from('sklearn.utils', 'resample')
+
 sparse = gpu_only_import_from('cupyx.scipy', 'sparse')
 stats = cpu_only_import_from('scipy', 'stats')
 

@@ -314,6 +314,17 @@ class KMeans(UniversalBase,
                                       else None),
                     check_dtype=[np.float32, np.float64]
                 )
+    """
+    @classmethod
+    def _hyperparam_translator(cls, **kwargs):
+        kwargs, gpuaccel = super()._hyperparam_translator(**kwargs)
+
+        if "n_init" in kwargs:
+            if Version(sklearn.__version__) == Version('1.3.2'):
+                if kwargs['n_init'] == 'warn':
+                    kwargs['n_init'] = 10
+
+        return kwargs, gpuaccel"""
 
     @generate_docstring()
     @enable_device_interop

@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,8 +13,9 @@
 # limitations under the License.
 #
 
+from sklearn.utils.random import sample_without_replacement
+
 import cuml.internals
-from cuml.internals.import_utils import has_sklearn
 from cuml.datasets.utils import _create_rs_generator
 
 from cuml.internals.safe_imports import gpu_only_import
@@ -33,14 +34,6 @@
 
 def _generate_hypercube(samples, dimensions, rng):
     """Returns distinct binary samples of length dimensions"""
-    if not has_sklearn():
-        raise RuntimeError(
-            "Scikit-learn is needed to run \
-                           make_classification."
-        )
-
-    from sklearn.utils.random import sample_without_replacement
-
     if dimensions > 30:
         return np.hstack(
             [

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
 # limitations under the License.
 #
 
+from sklearn.linear_model import LassoLarsIC, lars_path
+
 from cuml.internals.safe_imports import gpu_only_import
 cp = gpu_only_import('cupy')
 from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 import time
 
-from cuml.internals.import_utils import has_sklearn
 from cuml.internals.input_utils import input_to_cupy_array
 from cuml.explainer.base import SHAPBase
 from cuml.explainer.common import get_cai_ptr
@@ -29,7 +30,6 @@ from cuml.linear_model import Lasso
 from cuml.linear_model import LinearRegression
 from functools import lru_cache
 from itertools import combinations
-from numbers import Number
 from random import randint
 
 from pylibraft.common.handle cimport handle_t
@@ -583,30 +583,18 @@ def _l1_regularization(X,
     X = cp.transpose(
         w_sqrt_aug * cp.transpose(cp.vstack((X, X - 1))))
 
-    # Use lasso if Scikit-learn is not present
-    if not has_sklearn():
-        if l1_reg == 'auto':
-            l1_reg = 0.2
-        elif not isinstance(l1_reg, Number):
-            raise ImportError("Scikit-learn is required for l1 "
-                              "regularization that is not Lasso.")
-        nonzero_inds = cp.nonzero(Lasso(alpha=l1_reg).fit(X, y).coef_)[0]
-
-    # Else match default behavior of mainline SHAP
-    elif l1_reg == 'auto':
-        from sklearn.linear_model import LassoLarsIC
+    # Match default behavior of mainline SHAP
+    if l1_reg == 'auto':
         nonzero_inds = np.nonzero(
             LassoLarsIC(criterion="aic").fit(cp.asnumpy(X),
                                              cp.asnumpy(y)).coef_)[0]
 
     elif isinstance(l1_reg, str):
         if l1_reg.startswith("num_features("):
-            from sklearn.linear_model import lars_path
             r = int(l1_reg[len("num_features("):-1])
             nonzero_inds = lars_path(cp.asnumpy(X),
                                      cp.asnumpy(y), max_iter=r)[1]
         elif l1_reg in ["aic", "bic"]:
-            from sklearn.linear_model import LassoLarsIC
             nonzero_inds = np.nonzero(
                 LassoLarsIC(criterion=l1_reg).fit(cp.asnumpy(X),
                                                   cp.asnumpy(y)).coef_)[0]

@@ -16,7 +16,6 @@
 
 from cuml.common import input_to_cuml_array
 from cuml.internals.array import CumlArray
-from cuml.internals.import_utils import has_sklearn
 from cuml.internals.input_utils import determine_array_type
 from cuml.legacy.fil.fil import TreeliteModel
 from cuml.ensemble import RandomForestRegressor as curfr
@@ -28,12 +27,9 @@ from cuml.internals.safe_imports import cpu_only_import
 np = cpu_only_import('numpy')
 import treelite
 
-if has_sklearn():
-    from sklearn.ensemble import RandomForestRegressor as sklrfr
-    from sklearn.ensemble import RandomForestClassifier as sklrfc
-else:
-    sklrfr = object
-    sklrfc = object
+from sklearn.ensemble import RandomForestRegressor as sklrfr
+from sklearn.ensemble import RandomForestClassifier as sklrfc
+
 
 cdef extern from "treelite/c_api.h":
     cdef struct TreelitePyBufferFrame:

@@ -707,8 +707,17 @@ def to_output(
                 out_index = cudf_to_pandas(self.index)
             else:
                 out_index = self.index
+
+            if output_mem_type.is_device_accessible:
+                # Do not convert NaNs to nulls in cuDF
+                df_kwargs = {"nan_as_null": False}
+            else:
+                df_kwargs = {}
+
             try:
-                result = output_mem_type.xdf.DataFrame(arr, index=out_index)
+                result = output_mem_type.xdf.DataFrame(
+                    arr, index=out_index, **df_kwargs
+                )
                 return result
             except TypeError:
                 raise ValueError("Unsupported dtype for DataFrame")

@@ -930,7 +930,18 @@ class UniversalBase(Base):
         estimator = cls()
         estimator.import_cpu_model()
         estimator._cpu_model = model
-        params, gpuaccel = cls._hyperparam_translator(**model.get_params())
+
+        # Remove params that are set to their default values. This mirrors the
+        # behaviour when creating the estimator in `as_sklearn`.
+        sklearn_signature = inspect.signature(model.__init__)
+        params = model.get_params()
+        # We use list() so we can modify `params` inside the loop
+        for name in list(params.keys()):
+            value = params[name]
+            if value == sklearn_signature.parameters[name].default:
+                params.pop(name)
+
+        params, gpuaccel = cls._hyperparam_translator(**params)
         params = {key: params[key] for key in cls._get_param_names() if key in params}
         estimator.set_params(**params)
         estimator.cpu_to_gpu()

@@ -17,7 +17,7 @@
 
 from packaging.version import Version
 
-MIN_SKLEARN_VERSION = Version('1.5')
+MIN_SKLEARN_VERSION = Version('1.3.2')
 
 
 try:

@@ -160,15 +160,6 @@ def has_scipy(raise_if_unavailable=False, min_version=None):
             raise ImportError("Scipy is not available.")
 
 
-def has_sklearn():
-    try:
-        import sklearn  # NOQA
-
-        return True
-    except ImportError:
-        return False
-
-
 def has_hdbscan(raise_if_unavailable=False):
     try:
         import hdbscan  # NOQA
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,7 +17,7 @@ @@
     from packaging.version import Version
-    MIN_SKLEARN_VERSION = Version('1.5')
+    MIN_SKLEARN_VERSION = Version('1.3.2')
     try:
@@ Expand Down @@