Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ci/test_python_common.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

set -euo pipefail

Expand All @@ -13,7 +13,7 @@ rapids-logger "Generate Python testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file-key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \
--prepend-channel "${CPP_CHANNEL}" \
--prepend-channel "${PYTHON_CHANNEL}" | tee env.yaml

Expand Down
7 changes: 6 additions & 1 deletion ci/test_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")"
RAPIDS_PY_WHEEL_NAME="cuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist
RAPIDS_PY_WHEEL_NAME="libcuml_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist

# generate constraints, the constraints will limit the version of the
# dependencies that can be installed later on when installing the wheel
rapids-generate-pip-constraints test_python ./constraints.txt

# echo to expand wildcard before adding `[extra]` requires for pip
rapids-pip-retry install \
./dist/libcuml*.whl \
"$(echo ./dist/cuml*.whl)[test]"
"$(echo ./dist/cuml*.whl)[test]" \
--constraint ./constraints.txt

RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
mkdir -p "${RAPIDS_TESTS_DIR}"
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies:
- ninja
- nltk
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpy>=1.24,<3.0a0
- numpydoc
- nvcc_linux-aarch64=11.8
- packaging
Expand All @@ -67,7 +67,7 @@ dependencies:
- recommonmark
- rmm==25.6.*,>=0.0.0a0
- scikit-build-core>=0.10.0
- scikit-learn==1.5.*
- scikit-learn>=1.3.2
- scipy>=1.8.0
- seaborn
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies:
- ninja
- nltk
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpy>=1.24,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
- packaging
Expand All @@ -67,7 +67,7 @@ dependencies:
- recommonmark
- rmm==25.6.*,>=0.0.0a0
- scikit-build-core>=0.10.0
- scikit-learn==1.5.*
- scikit-learn>=1.3.2
- scipy>=1.8.0
- seaborn
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dependencies:
- ninja
- nltk
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpy>=1.24,<3.0a0
- numpydoc
- packaging
- pydata-sphinx-theme!=0.14.2
Expand All @@ -63,7 +63,7 @@ dependencies:
- recommonmark
- rmm==25.6.*,>=0.0.0a0
- scikit-build-core>=0.10.0
- scikit-learn==1.5.*
- scikit-learn>=1.3.2
- scipy>=1.8.0
- seaborn
- sphinx-copybutton
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ dependencies:
- ninja
- nltk
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpy>=1.24,<3.0a0
- numpydoc
- packaging
- pydata-sphinx-theme!=0.14.2
Expand All @@ -63,7 +63,7 @@ dependencies:
- recommonmark
- rmm==25.6.*,>=0.0.0a0
- scikit-build-core>=0.10.0
- scikit-learn==1.5.*
- scikit-learn>=1.3.2
- scipy>=1.8.0
- seaborn
- sphinx-copybutton
Expand Down
15 changes: 13 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,14 @@ dependencies:
- dask-cuda==25.6.*,>=0.0.0a0
- joblib>=0.11
- numba>=0.59.1,<0.61.0a0
- numpy>=1.23,<3.0a0
- numpy>=1.24,<3.0a0
# TODO: Is scipy really a hard dependency, or should
# we make it optional (i.e. an extra for pip
# installation/run_constrained for conda)?
- scipy>=1.8.0
- packaging
- rapids-dask-dependency==25.6.*,>=0.0.0a0
- &scikit_learn scikit-learn>=1.3.2
- *treelite
- output_types: requirements
packages:
Expand Down Expand Up @@ -439,7 +440,7 @@ dependencies:
# https://github.com/pydata/pydata-sphinx-theme/issues/1539
- pydata-sphinx-theme!=0.14.2
- recommonmark
- &scikit_learn scikit-learn==1.5.*
- *scikit_learn
- sphinx<8.2.0
- sphinx-copybutton
- sphinx-markdown-tables
Expand Down Expand Up @@ -476,6 +477,16 @@ dependencies:
packages:
- *cmake_ver
test_python:
specific:
- output_types: [conda, requirements]
matrices:
- matrix: {dependencies: "oldest"}
packages:
- scikit-learn==1.3.2
- scipy==1.8.0
- numpy==1.24
- matrix:
packages:
common:
- output_types: [conda, requirements, pyproject]
packages:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@
import functools
import timeit
import numbers
from cuml.internals.import_utils import has_sklearn

if has_sklearn():
from sklearn.base import clone
from sklearn.utils import Bunch
from sklearn.base import clone
from sklearn.utils import Bunch
from contextlib import contextmanager
from collections import defaultdict
import warnings
Expand Down Expand Up @@ -561,9 +559,6 @@ def __init__(self,
n_jobs=None,
transformer_weights=None,
verbose=False):
if not has_sklearn():
raise ImportError("Scikit-learn is needed to use the "
"Column Transformer")
if not transformers:
warnings.warn('Transformers are required')
self.transformers = transformers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,14 @@
from cuml.internals.safe_imports import cpu_only_import
cpu_np = cpu_only_import('numpy')
np = gpu_only_import('cupy')
resample = cpu_only_import_from('sklearn.utils._indexing', 'resample')

# In scikit-learn 1.4.x the `resample` function was available in
# `sklearn.utils`, but got moved to `sklearn.utils._indexing` in 1.5.
try:
resample = cpu_only_import_from('sklearn.utils._indexing', 'resample')
except ModuleNotFoundError:
resample = cpu_only_import_from('sklearn.utils', 'resample')

sparse = gpu_only_import_from('cupyx.scipy', 'sparse')
stats = cpu_only_import_from('scipy', 'stats')

Expand Down
11 changes: 11 additions & 0 deletions python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,17 @@ class KMeans(UniversalBase,
else None),
check_dtype=[np.float32, np.float64]
)
"""
@classmethod
def _hyperparam_translator(cls, **kwargs):
kwargs, gpuaccel = super()._hyperparam_translator(**kwargs)

if "n_init" in kwargs:
if Version(sklearn.__version__) == Version('1.3.2'):
if kwargs['n_init'] == 'warn':
kwargs['n_init'] = 10

return kwargs, gpuaccel"""

@generate_docstring()
@enable_device_interop
Expand Down
13 changes: 3 additions & 10 deletions python/cuml/cuml/datasets/classification.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,8 +13,9 @@
# limitations under the License.
#

from sklearn.utils.random import sample_without_replacement

import cuml.internals
from cuml.internals.import_utils import has_sklearn
from cuml.datasets.utils import _create_rs_generator

from cuml.internals.safe_imports import gpu_only_import
Expand All @@ -33,14 +34,6 @@

def _generate_hypercube(samples, dimensions, rng):
"""Returns distinct binary samples of length dimensions"""
if not has_sklearn():
raise RuntimeError(
"Scikit-learn is needed to run \
make_classification."
)

from sklearn.utils.random import sample_without_replacement

if dimensions > 30:
return np.hstack(
[
Expand Down
22 changes: 5 additions & 17 deletions python/cuml/cuml/explainer/kernel_shap.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -14,13 +14,14 @@
# limitations under the License.
#

from sklearn.linear_model import LassoLarsIC, lars_path

from cuml.internals.safe_imports import gpu_only_import
cp = gpu_only_import('cupy')
from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
import time

from cuml.internals.import_utils import has_sklearn
from cuml.internals.input_utils import input_to_cupy_array
from cuml.explainer.base import SHAPBase
from cuml.explainer.common import get_cai_ptr
Expand All @@ -29,7 +30,6 @@ from cuml.linear_model import Lasso
from cuml.linear_model import LinearRegression
from functools import lru_cache
from itertools import combinations
from numbers import Number
from random import randint

from pylibraft.common.handle cimport handle_t
Expand Down Expand Up @@ -583,30 +583,18 @@ def _l1_regularization(X,
X = cp.transpose(
w_sqrt_aug * cp.transpose(cp.vstack((X, X - 1))))

# Use lasso if Scikit-learn is not present
if not has_sklearn():
if l1_reg == 'auto':
l1_reg = 0.2
elif not isinstance(l1_reg, Number):
raise ImportError("Scikit-learn is required for l1 "
"regularization that is not Lasso.")
nonzero_inds = cp.nonzero(Lasso(alpha=l1_reg).fit(X, y).coef_)[0]

# Else match default behavior of mainline SHAP
elif l1_reg == 'auto':
from sklearn.linear_model import LassoLarsIC
# Match default behavior of mainline SHAP
if l1_reg == 'auto':
nonzero_inds = np.nonzero(
LassoLarsIC(criterion="aic").fit(cp.asnumpy(X),
cp.asnumpy(y)).coef_)[0]

elif isinstance(l1_reg, str):
if l1_reg.startswith("num_features("):
from sklearn.linear_model import lars_path
r = int(l1_reg[len("num_features("):-1])
nonzero_inds = lars_path(cp.asnumpy(X),
cp.asnumpy(y), max_iter=r)[1]
elif l1_reg in ["aic", "bic"]:
from sklearn.linear_model import LassoLarsIC
nonzero_inds = np.nonzero(
LassoLarsIC(criterion=l1_reg).fit(cp.asnumpy(X),
cp.asnumpy(y)).coef_)[0]
Expand Down
10 changes: 3 additions & 7 deletions python/cuml/cuml/explainer/tree_shap.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

from cuml.common import input_to_cuml_array
from cuml.internals.array import CumlArray
from cuml.internals.import_utils import has_sklearn
from cuml.internals.input_utils import determine_array_type
from cuml.legacy.fil.fil import TreeliteModel
from cuml.ensemble import RandomForestRegressor as curfr
Expand All @@ -28,12 +27,9 @@ from cuml.internals.safe_imports import cpu_only_import
np = cpu_only_import('numpy')
import treelite

if has_sklearn():
from sklearn.ensemble import RandomForestRegressor as sklrfr
from sklearn.ensemble import RandomForestClassifier as sklrfc
else:
sklrfr = object
sklrfc = object
from sklearn.ensemble import RandomForestRegressor as sklrfr
from sklearn.ensemble import RandomForestClassifier as sklrfc


cdef extern from "treelite/c_api.h":
cdef struct TreelitePyBufferFrame:
Expand Down
11 changes: 10 additions & 1 deletion python/cuml/cuml/internals/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,17 @@ def to_output(
out_index = cudf_to_pandas(self.index)
else:
out_index = self.index

if output_mem_type.is_device_accessible:
# Do not convert NaNs to nulls in cuDF
df_kwargs = {"nan_as_null": False}
else:
df_kwargs = {}

try:
result = output_mem_type.xdf.DataFrame(arr, index=out_index)
result = output_mem_type.xdf.DataFrame(
arr, index=out_index, **df_kwargs
)
return result
except TypeError:
raise ValueError("Unsupported dtype for DataFrame")
Expand Down
13 changes: 12 additions & 1 deletion python/cuml/cuml/internals/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,18 @@ class UniversalBase(Base):
estimator = cls()
estimator.import_cpu_model()
estimator._cpu_model = model
params, gpuaccel = cls._hyperparam_translator(**model.get_params())

# Remove params that are set to their default values. This mirrors the
# behaviour when creating the estimator in `as_sklearn`.
sklearn_signature = inspect.signature(model.__init__)
params = model.get_params()
# We use list() so we can modify `params` inside the loop
for name in list(params.keys()):
value = params[name]
if value == sklearn_signature.parameters[name].default:
params.pop(name)

params, gpuaccel = cls._hyperparam_translator(**params)
params = {key: params[key] for key in cls._get_param_names() if key in params}
estimator.set_params(**params)
estimator.cpu_to_gpu()
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/internals/device_support.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from packaging.version import Version

MIN_SKLEARN_VERSION = Version('1.5')
MIN_SKLEARN_VERSION = Version('1.3.2')


try:
Expand Down
9 changes: 0 additions & 9 deletions python/cuml/cuml/internals/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,6 @@ def has_scipy(raise_if_unavailable=False, min_version=None):
raise ImportError("Scipy is not available.")


def has_sklearn():
try:
import sklearn # NOQA

return True
except ImportError:
return False


def has_hdbscan(raise_if_unavailable=False):
try:
import hdbscan # NOQA
Expand Down
Loading