rapidsai · rapids-bot · Feb 24, 2025 · Jan 14, 2025 · Feb 7, 2025 · Feb 14, 2025
@@ -771,7 +771,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
         """
         Fit HDBSCAN model from features.
         """
-
+        self._all_finite = True
         X_m, n_rows, n_cols, self.dtype = \
             input_to_cuml_array(X, order='C',
                                 check_dtype=[np.float32],
@@ -1163,7 +1163,7 @@ class HDBSCAN(UniversalBase, ClusterMixin, CMajorInputTagMixin):
     def get_attr_names(self):
         attr_names = ['labels_', 'probabilities_', 'cluster_persistence_',
                       'condensed_tree_', 'single_linkage_tree_',
-                      'outlier_scores_']
+                      'outlier_scores_', '_all_finite']
         if self.gen_min_span_tree:
             attr_names = attr_names + ['minimum_spanning_tree_']
         if self.prediction_data:

@@ -288,6 +288,12 @@ class PCA(UniversalBase,
         },
         "n_components": {
             "mle": "NotImplemented"
+        },
+        "tol": {
+            # tolerance controls tolerance of different solvers
+            # between sklearn and cuML, so at least the default
+            # value needs to be translated.
+            0.0: 1e-7
         }
     }
 

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -245,6 +245,18 @@ class TruncatedSVD(UniversalBase,
             "randomized": "full",
             "arpack": "full",
         },
+        "tol": {
+            # tolerance controls tolerance of different solvers
+            # between sklearn and cuML, so at least the default
+            # value needs to be translated.
+            0.0: 1e-7
+        },
+        "n_iter": {
+            # Translating the default n_iter from sklearn to the
+            # default of 15 of cuML to keep behavior consistent,
+            # and results performing closer.
+            5: 15
+        }
     }
 
     @device_interop_preparation

@@ -73,7 +73,7 @@ def main(module, convert_to_sklearn, format, output, args):
         (module,) = module
         # run the module passing the remaining arguments
         # as if it were run with python -m <module> <args>
-        sys.argv[:] = [module] + args  # not thread safe?
+        sys.argv[:] = [module, *args.args]  # not thread safe?
         runpy.run_module(module, run_name="__main__")
     elif len(args) >= 1:
         # Remove ourself from argv and continue

@@ -207,15 +207,21 @@ def __init__(self, *args, **kwargs):
             self._cpu_model_class = (
                 original_class_a  # Store a reference to the original class
             )
-            kwargs, self._gpuaccel = self._hyperparam_translator(**kwargs)
-            super().__init__(*args, **kwargs)
+
+            translated_kwargs, self._gpuaccel = self._hyperparam_translator(
+                **kwargs
+            )
+            super().__init__(*args, **translated_kwargs)
 
             self._cpu_hyperparams = list(
                 inspect.signature(
                     self._cpu_model_class.__init__
                 ).parameters.keys()
             )
 
+            self.import_cpu_model()
+            self.build_cpu_model(**kwargs)
+
         def __repr__(self):
             """
             Return a formal string representation of the object.
@@ -226,7 +232,7 @@ def __repr__(self):
                 A string representation indicating that this is a wrapped
                  version of the original CPU-based estimator.
             """
-            return f"wrapped {self._cpu_model_class}"
+            return self._cpu_model.__repr__()
 
         def __str__(self):
             """
@@ -238,7 +244,7 @@ def __str__(self):
                 A string representation indicating that this is a wrapped
                  version of the original CPU-based estimator.
             """
-            return f"ProxyEstimator of {self._cpu_model_class}"
+            return self._cpu_model.__str__()
 
         def __getstate__(self):
             """

@@ -548,7 +548,10 @@ class Base(TagsMixin,
         """
         gpuaccel = True
         # Copy it so we can modify it
-        translations = dict(cls.__bases__[0]._hyperparam_interop_translator)
+        # we need to explicitly use UniversalBase because not all estimator
+        # have it as the first parent in their MRO/inheritance, like
+        # linear_regression
+        translations = dict(UniversalBase._hyperparam_interop_translator)
         # Allow the derived class to overwrite the base class
         translations.update(cls._hyperparam_interop_translator)
         for parameter_name, value in kwargs.items():
@@ -640,17 +643,20 @@ class UniversalBase(Base):
             inspect.signature(self._cpu_model_class.__init__).parameters.keys()
         )
 
-    def build_cpu_model(self):
+    def build_cpu_model(self, **kwargs):
         if hasattr(self, '_cpu_model'):
             return
-        filtered_kwargs = {}
-        for keyword, arg in self._full_kwargs.items():
-            if keyword in self._cpu_hyperparams:
-                filtered_kwargs[keyword] = arg
-            else:
-                logger.info("Unused keyword parameter: {} "
-                            "during CPU estimator "
-                            "initialization".format(keyword))
+        if kwargs:
+            filtered_kwargs = kwargs
+        else:
+            filtered_kwargs = {}
+            for keyword, arg in self._full_kwargs.items():
+                if keyword in self._cpu_hyperparams:
+                    filtered_kwargs[keyword] = arg
+                else:
+                    logger.info("Unused keyword parameter: {} "
+                                "during CPU estimator "
+                                "initialization".format(keyword))
 
         # initialize model
         self._cpu_model = self._cpu_model_class(**filtered_kwargs)
@@ -848,12 +854,18 @@ class UniversalBase(Base):
     def __getattr__(self, attr):
         try:
             return super().__getattr__(attr)
-        except AttributeError:
+        except AttributeError as ex:
             # When using cuml.experimental.accel or setting the
             # self._experimental_dispatching flag to True, we look for methods
             # that are not in the cuML estimator in the host estimator
             gs = GlobalSettings()
             if gs.accelerator_active or self._experimental_dispatching:
+                # we don't want to special sklearn dispatch cloning function
+                # so that cloning works with this class as a regular estimator
+                # without __sklearn_clone__
+                if attr == "__sklearn_clone__":
+                    raise ex
+
                 self.import_cpu_model()
                 if hasattr(self._cpu_model_class, attr):
                     # we turn off and cache the dispatching variables off so that
@@ -940,6 +952,9 @@ class UniversalBase(Base):
         estimator = cls()
         estimator.import_cpu_model()
         estimator._cpu_model = model
+        params, gpuaccel = cls._hyperparam_translator(**model.get_params())
+        params = {key: params[key] for key in cls._get_param_names() if key in params}
+        estimator.set_params(**params)
         estimator.cpu_to_gpu()
 
         # we need to set an output type here since
@@ -950,3 +965,44 @@ class UniversalBase(Base):
         estimator.output_mem_type = MemoryType.host
 
         return estimator
+
+    def get_params(self, deep=True):
+        """
+        Get parameters for this estimator.
+
+        Parameters
+        ----------
+        deep : bool, default=True
+            If True, will return the parameters for this estimator and
+            contained subobjects that are estimators.
+
+        Returns
+        -------
+        params : dict
+            Parameter names mapped to their values.
+        """
+        if GlobalSettings().accelerator_active or self._experimental_dispatching:
+            return self._cpu_model.get_params(deep=deep)
+        else:
+            return super().get_params(deep=deep)
+
+    def set_params(self, **params):
+        """
+        Set parameters for this estimator.
+
+        Parameters
+        ----------
+        **params : dict
+            Estimator parameters
+
+        Returns
+        -------
+        self : estimator instance
+            The estimnator instance
+        """
+        if GlobalSettings().accelerator_active or self._experimental_dispatching:
+            self._cpu_model.set_params(**params)
+            params, gpuaccel = self._hyperparam_translator(**params)
+            params = {key: params[key] for key in self._get_param_names() if key in params}
+        super().set_params(**params)
+        return self
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -157,6 +157,12 @@ class ElasticNet(UniversalBase,
         "warm_start": {
             True: "NotImplemented",
         },
+        "tol": {
+            # tolerance controls tolerance of different solvers
+            # between sklearn and cuML, so at least the default
+            # value needs to be translated.
+            0.0001: 0.001
+        }
     }
 
     @device_interop_preparation

@@ -492,7 +492,9 @@ class LogisticRegression(UniversalBase,
         return l1_strength, l2_strength
 
     def _build_class_weights(self, class_weight):
-        if class_weight == 'balanced':
+        if class_weight is None:
+            self.class_weight = None
+        elif class_weight == 'balanced':
             self.class_weight = 'balanced'
         else:
             classes = list(class_weight.keys())
@@ -522,7 +524,9 @@ class LogisticRegression(UniversalBase,
             class_weight = params.pop('class_weight')
             self._build_class_weights(class_weight)
 
-        # Update solver
+        # if the user is setting the solver, then
+        # it cannot be propagated to the solver model itself.
+        _ = params.pop("solver", None)
         self.solver_model.set_params(**params)
         return self
 

@@ -14,7 +14,10 @@
 
 import pytest
 import numpy as np
-import cupy as cp
+from sklearn import clone, cluster
+import cuml
+from cuml.internals.global_settings import GlobalSettings
+from cuml.internals.safe_imports import gpu_only_import
 from sklearn.datasets import make_classification, make_regression, make_blobs
 from sklearn.linear_model import (
     LinearRegression,
@@ -40,6 +43,8 @@
 )
 from scipy.sparse import random as sparse_random
 
+cp = gpu_only_import("cupy")
+
 
 def test_kmeans():
     X, y_true = make_blobs(n_samples=100, centers=3, random_state=42)
@@ -172,6 +177,49 @@ def test_proxy_facade():
             assert original_value == proxy_value
 
 
+def test_proxy_clone():
+    # Test that cloning a proxy estimator preserves parameters, even those we
+    # translate for the cuml class
+    pca = PCA(n_components=42, svd_solver="arpack")
+    pca_clone = clone(pca)
+
+    assert pca.get_params() == pca_clone.get_params()
+
+
+def test_proxy_params():
+    # Test that parameters match between constructor and get_params()
+    # Mix of default and non-default values
+    pca = PCA(
+        n_components=5,
+        copy=False,
+        # Pass in an argument and set it to its default value
+        whiten=False,
+    )
+
+    params = pca.get_params()
+    assert params["n_components"] == 5
+    assert params["copy"] is False
+    assert params["whiten"] is False
+    # A parameter we never touched, should be the default
+    assert params["tol"] == 0.0
+
+    # Check that get_params doesn't return any unexpected parameters
+    expected_params = set(
+        [
+            "n_components",
+            "copy",
+            "whiten",
+            "tol",
+            "svd_solver",
+            "n_oversamples",
+            "random_state",
+            "iterated_power",
+            "power_iteration_normalizer",
+        ]
+    )
+    assert set(params.keys()) == expected_params
+
+
 def test_defaults_args_only_methods():
     # Check that estimator methods that take no arguments work
     # These are slightly weird because basically everything else takes
@@ -184,7 +232,12 @@ def test_defaults_args_only_methods():
     nn.kneighbors()
 
 
+@pytest.mark.skipif(
+    not GlobalSettings().accelerator_active,
+    reason="Test designed to test output type of cuml.accel",
+)
 def test_kernel_ridge():
+
     rng = np.random.RandomState(42)
 
     X = 5 * rng.rand(10000, 1)