Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3d0760e
FIX fix cloning
dantegd Jan 14, 2025
ca66732
Merge branch 'branch-25.02' into fix-interop-fixes
dantegd Feb 7, 2025
497d181
FIX changes from PR review to not use internal sklearn APIs and mate …
dantegd Feb 14, 2025
dce1539
Merge cuML branch-25.04
dantegd Feb 14, 2025
0d797aa
Merge branch 'branch-25.04' into fix-interop-fixes
dantegd Feb 14, 2025
0ef4895
ENH Keep list of original hyperparams that user passed
dantegd Feb 14, 2025
a00af9d
Merge branch 'fix-interop-fixes' of github.com:dantegd/cuml into fix-…
dantegd Feb 14, 2025
4e70f4c
FIX remove unused imported function
dantegd Feb 14, 2025
de3e234
Check that get_params and cloning work
betatim Feb 14, 2025
f14a14b
Typo fix
betatim Feb 20, 2025
5a02fd0
ENH multiple improvements by using the cpu_model as the reference tru…
dantegd Feb 21, 2025
e0cd0d5
FIX style fixes
dantegd Feb 21, 2025
d206bb8
Merge cuML branch-25.04
dantegd Feb 21, 2025
e70c3fb
Merge branch 'branch-25.04' into fix-interop-fixes
dantegd Feb 21, 2025
3785c4e
DOC correct docstrings
dantegd Feb 21, 2025
fd17f09
Merge branch 'fix-interop-fixes' of github.com:dantegd/cuml into fix-…
dantegd Feb 21, 2025
e7a35a1
Move imports to the top
betatim Feb 21, 2025
742404e
Fix style
betatim Feb 21, 2025
fa41369
FIX multiple fixes from issues raised in PR review
dantegd Feb 24, 2025
4fb4982
FIX skip pytest for cuml.accel output type when accel is not active
dantegd Feb 24, 2025
b2c31b2
FIX small test fixes
dantegd Feb 24, 2025
5a5bf59
Merge cuML branch-25.04
dantegd Feb 24, 2025
8eeb5b7
FIX fix from a bad merge
dantegd Feb 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
"""
Fit HDBSCAN model from features.
"""

self._all_finite = True
X_m, n_rows, n_cols, self.dtype = \
input_to_cuml_array(X, order='C',
check_dtype=[np.float32],
Expand Down Expand Up @@ -1163,7 +1163,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
def get_attr_names(self):
attr_names = ['labels_', 'probabilities_', 'cluster_persistence_',
'condensed_tree_', 'single_linkage_tree_',
'outlier_scores_']
'outlier_scores_', '_all_finite']
if self.gen_min_span_tree:
attr_names = attr_names + ['minimum_spanning_tree_']
if self.prediction_data:
Expand Down
6 changes: 6 additions & 0 deletions python/cuml/cuml/decomposition/pca.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,12 @@ class PCA(UniversalBase,
},
"n_components": {
"mle": "NotImplemented"
},
"tol": {
# tolerance controls tolerance of different solvers
# between sklearn and cuML, so at least the default
# value needs to be translated.
0.0: 1e-7
}
}

Expand Down
14 changes: 13 additions & 1 deletion python/cuml/cuml/decomposition/tsvd.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -245,6 +245,18 @@ class TruncatedSVD(UniversalBase,
"randomized": "full",
"arpack": "full",
},
"tol": {
# tolerance controls tolerance of different solvers
# between sklearn and cuML, so at least the default
# value needs to be translated.
0.0: 1e-7
},
"n_iter": {
# Translating the default n_iter from sklearn to the
# default of 15 of cuML to keep behavior consistent,
# and results performing closer.
5: 15
}
}

@device_interop_preparation
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/experimental/accel/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def main(module, convert_to_sklearn, format, output, args):
(module,) = module
# run the module passing the remaining arguments
# as if it were run with python -m <module> <args>
sys.argv[:] = [module] + args # not thread safe?
sys.argv[:] = [module, *args.args] # not thread safe?
runpy.run_module(module, run_name="__main__")
elif len(args) >= 1:
# Remove ourself from argv and continue
Expand Down
14 changes: 10 additions & 4 deletions python/cuml/cuml/experimental/accel/estimator_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,21 @@ def __init__(self, *args, **kwargs):
self._cpu_model_class = (
original_class_a # Store a reference to the original class
)
kwargs, self._gpuaccel = self._hyperparam_translator(**kwargs)
super().__init__(*args, **kwargs)

translated_kwargs, self._gpuaccel = self._hyperparam_translator(
**kwargs
)
super().__init__(*args, **translated_kwargs)

self._cpu_hyperparams = list(
inspect.signature(
self._cpu_model_class.__init__
).parameters.keys()
)

self.import_cpu_model()
self.build_cpu_model(**kwargs)

def __repr__(self):
"""
Return a formal string representation of the object.
Expand All @@ -226,7 +232,7 @@ def __repr__(self):
A string representation indicating that this is a wrapped
version of the original CPU-based estimator.
"""
return f"wrapped {self._cpu_model_class}"
return self._cpu_model.__repr__()

def __str__(self):
"""
Expand All @@ -238,7 +244,7 @@ def __str__(self):
A string representation indicating that this is a wrapped
version of the original CPU-based estimator.
"""
return f"ProxyEstimator of {self._cpu_model_class}"
return self._cpu_model.__str__()

def __getstate__(self):
"""
Expand Down
78 changes: 67 additions & 11 deletions python/cuml/cuml/internals/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,10 @@ class Base(TagsMixin,
"""
gpuaccel = True
# Copy it so we can modify it
translations = dict(cls.__bases__[0]._hyperparam_interop_translator)
# we need to explicitly use UniversalBase because not all estimator
# have it as the first parent in their MRO/inheritance, like
# linear_regression
translations = dict(UniversalBase._hyperparam_interop_translator)
# Allow the derived class to overwrite the base class
translations.update(cls._hyperparam_interop_translator)
for parameter_name, value in kwargs.items():
Expand Down Expand Up @@ -640,17 +643,20 @@ class UniversalBase(Base):
inspect.signature(self._cpu_model_class.__init__).parameters.keys()
)

def build_cpu_model(self):
def build_cpu_model(self, **kwargs):
if hasattr(self, '_cpu_model'):
return
filtered_kwargs = {}
for keyword, arg in self._full_kwargs.items():
if keyword in self._cpu_hyperparams:
filtered_kwargs[keyword] = arg
else:
logger.info("Unused keyword parameter: {} "
"during CPU estimator "
"initialization".format(keyword))
if kwargs:
filtered_kwargs = kwargs
else:
filtered_kwargs = {}
for keyword, arg in self._full_kwargs.items():
if keyword in self._cpu_hyperparams:
filtered_kwargs[keyword] = arg
else:
logger.info("Unused keyword parameter: {} "
"during CPU estimator "
"initialization".format(keyword))

# initialize model
self._cpu_model = self._cpu_model_class(**filtered_kwargs)
Expand Down Expand Up @@ -848,12 +854,18 @@ class UniversalBase(Base):
def __getattr__(self, attr):
try:
return super().__getattr__(attr)
except AttributeError:
except AttributeError as ex:
# When using cuml.experimental.accel or setting the
# self._experimental_dispatching flag to True, we look for methods
# that are not in the cuML estimator in the host estimator
gs = GlobalSettings()
if gs.accelerator_active or self._experimental_dispatching:
# we don't want to special sklearn dispatch cloning function
# so that cloning works with this class as a regular estimator
# without __sklearn_clone__
if attr == "__sklearn_clone__":
raise ex

self.import_cpu_model()
if hasattr(self._cpu_model_class, attr):
# we turn off and cache the dispatching variables off so that
Expand Down Expand Up @@ -940,6 +952,9 @@ class UniversalBase(Base):
estimator = cls()
estimator.import_cpu_model()
estimator._cpu_model = model
params, gpuaccel = cls._hyperparam_translator(**model.get_params())
params = {key: params[key] for key in cls._get_param_names() if key in params}
estimator.set_params(**params)
estimator.cpu_to_gpu()

# we need to set an output type here since
Expand All @@ -950,3 +965,44 @@ class UniversalBase(Base):
estimator.output_mem_type = MemoryType.host

return estimator

def get_params(self, deep=True):
"""
Get parameters for this estimator.

Parameters
----------
deep : bool, default=True
If True, will return the parameters for this estimator and
contained subobjects that are estimators.

Returns
-------
params : dict
Parameter names mapped to their values.
"""
if GlobalSettings().accelerator_active or self._experimental_dispatching:
return self._cpu_model.get_params(deep=deep)
else:
return super().get_params(deep=deep)

def set_params(self, **params):
"""
Set parameters for this estimator.

Parameters
----------
**params : dict
Estimator parameters

Returns
-------
self : estimator instance
The estimnator instance
"""
if GlobalSettings().accelerator_active or self._experimental_dispatching:
self._cpu_model.set_params(**params)
params, gpuaccel = self._hyperparam_translator(**params)
params = {key: params[key] for key in self._get_param_names() if key in params}
super().set_params(**params)
return self
8 changes: 7 additions & 1 deletion python/cuml/cuml/linear_model/elastic_net.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -157,6 +157,12 @@ class ElasticNet(UniversalBase,
"warm_start": {
True: "NotImplemented",
},
"tol": {
# tolerance controls tolerance of different solvers
# between sklearn and cuML, so at least the default
# value needs to be translated.
0.0001: 0.001
}
}

@device_interop_preparation
Expand Down
8 changes: 6 additions & 2 deletions python/cuml/cuml/linear_model/logistic_regression.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,9 @@ class LogisticRegression(UniversalBase,
return l1_strength, l2_strength

def _build_class_weights(self, class_weight):
if class_weight == 'balanced':
if class_weight is None:
self.class_weight = None
elif class_weight == 'balanced':
self.class_weight = 'balanced'
else:
classes = list(class_weight.keys())
Expand Down Expand Up @@ -522,7 +524,9 @@ class LogisticRegression(UniversalBase,
class_weight = params.pop('class_weight')
self._build_class_weights(class_weight)

# Update solver
# if the user is setting the solver, then
# it cannot be propagated to the solver model itself.
_ = params.pop("solver", None)
self.solver_model.set_params(**params)
return self

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

import pytest
import numpy as np
import cupy as cp
from sklearn import clone, cluster
import cuml
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.safe_imports import gpu_only_import
from sklearn.datasets import make_classification, make_regression, make_blobs
from sklearn.linear_model import (
LinearRegression,
Expand All @@ -40,6 +43,8 @@
)
from scipy.sparse import random as sparse_random

cp = gpu_only_import("cupy")


def test_kmeans():
X, y_true = make_blobs(n_samples=100, centers=3, random_state=42)
Expand Down Expand Up @@ -172,6 +177,49 @@ def test_proxy_facade():
assert original_value == proxy_value


def test_proxy_clone():
# Test that cloning a proxy estimator preserves parameters, even those we
# translate for the cuml class
pca = PCA(n_components=42, svd_solver="arpack")
pca_clone = clone(pca)

assert pca.get_params() == pca_clone.get_params()


def test_proxy_params():
# Test that parameters match between constructor and get_params()
# Mix of default and non-default values
pca = PCA(
n_components=5,
copy=False,
# Pass in an argument and set it to its default value
whiten=False,
)

params = pca.get_params()
assert params["n_components"] == 5
assert params["copy"] is False
assert params["whiten"] is False
# A parameter we never touched, should be the default
assert params["tol"] == 0.0

# Check that get_params doesn't return any unexpected parameters
expected_params = set(
[
"n_components",
"copy",
"whiten",
"tol",
"svd_solver",
"n_oversamples",
"random_state",
"iterated_power",
"power_iteration_normalizer",
]
)
assert set(params.keys()) == expected_params


def test_defaults_args_only_methods():
# Check that estimator methods that take no arguments work
# These are slightly weird because basically everything else takes
Expand All @@ -184,7 +232,12 @@ def test_defaults_args_only_methods():
nn.kneighbors()


@pytest.mark.skipif(
not GlobalSettings().accelerator_active,
reason="Test designed to test output type of cuml.accel",
)
def test_kernel_ridge():

rng = np.random.RandomState(42)

X = 5 * rng.rand(10000, 1)
Expand Down
Loading