diff --git a/python/cuml/cuml/linear_model/elastic_net.py b/python/cuml/cuml/linear_model/elastic_net.py index d28efd1deb..d0e388b678 100644 --- a/python/cuml/cuml/linear_model/elastic_net.py +++ b/python/cuml/cuml/linear_model/elastic_net.py @@ -2,10 +2,6 @@ # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # - -from inspect import signature - -from cuml.common import input_to_cuml_array from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.common.doc_utils import generate_docstring from cuml.internals.array import CumlArray @@ -16,98 +12,62 @@ to_cpu, to_gpu, ) -from cuml.internals.logger import warn from cuml.internals.mixins import FMajorInputTagMixin, RegressorMixin from cuml.linear_model.base import LinearPredictMixin -from cuml.solvers import CD, QN +from cuml.solvers import QN +from cuml.solvers.cd import fit_coordinate_descent class ElasticNet( Base, InteropMixin, LinearPredictMixin, RegressorMixin, FMajorInputTagMixin ): - """ - ElasticNet extends LinearRegression with combined L1 and L2 regularizations - on the coefficients when predicting response y with a linear combination of - the predictors in X. It can reduce the variance of the predictors, force - some coefficients to be small, and improves the conditioning of the - problem. - - cuML's ElasticNet an array-like object or cuDF DataFrame, uses coordinate - descent to fit a linear model. - - Examples - -------- - - .. code-block:: python - - >>> import cupy as cp - >>> import cudf - >>> from cuml.linear_model import ElasticNet - >>> enet = ElasticNet(alpha = 0.1, l1_ratio=0.5, solver='qn') - >>> X = cudf.DataFrame() - >>> X['col1'] = cp.array([0, 1, 2], dtype = cp.float32) - >>> X['col2'] = cp.array([0, 1, 2], dtype = cp.float32) - >>> y = cudf.Series(cp.array([0.0, 1.0, 2.0], dtype = cp.float32) ) - >>> result_enet = enet.fit(X, y) - >>> print(result_enet.coef_) - 0 0.445... - 1 0.445... - dtype: float32 - >>> print(result_enet.intercept_) - 0.108433... - >>> X_new = cudf.DataFrame() - >>> X_new['col1'] = cp.array([3,2], dtype = cp.float32) - >>> X_new['col2'] = cp.array([5,5], dtype = cp.float32) - >>> preds = result_enet.predict(X_new) - >>> print(preds) - 0 3.674... - 1 3.228... - dtype: float32 + Linear regression with combined L1 and L2 priors as regularizer. Parameters ---------- - alpha : float (default = 1.0) + alpha : float, default=1.0 Constant that multiplies the L1 term. alpha = 0 is equivalent to an ordinary least square, solved by the LinearRegression object. For numerical reasons, using alpha = 0 with the Lasso object is not advised. Given this, you should use the LinearRegression object. - l1_ratio : float (default = 0.5) + l1_ratio : float, default=0.5 The ElasticNet mixing parameter, with 0 <= l1_ratio <= 1. For l1_ratio = 0 the penalty is an L2 penalty. For l1_ratio = 1 it is an L1 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. - fit_intercept : boolean (default = True) + fit_intercept : boolean, default=True If True, Lasso tries to correct for the global mean of y. If False, the model expects that you have centered the data. - normalize : boolean (default = False) - If True, the predictors in X will be normalized by dividing by the - column-wise standard deviation. - If False, no scaling will be done. - Note: this is in contrast to sklearn's deprecated `normalize` flag, - which divides by the column-wise L2 norm; but this is the same as if - using sklearn's StandardScaler. - max_iter : int (default = 1000) + max_iter : int, default=1000 The maximum number of iterations - tol : float (default = 1e-3) + tol : float, default=1e-3 The tolerance for the optimization: if the updates are smaller than tol, the optimization code checks the dual gap for optimality and continues until it is smaller than tol. - solver : {'cd', 'qn'} (default='cd') + solver : {'cd', 'qn'}, default='cd' Choose an algorithm: * 'cd' - coordinate descent * 'qn' - quasi-newton You may find the alternative 'qn' algorithm is faster when the number - of features is sufficiently large, but the sample size is small. - selection : {'cyclic', 'random'} (default='cyclic') - If set to 'random', a random coefficient is updated every iteration - rather than looping over features sequentially by default. - This (setting to 'random') often leads to significantly faster - convergence especially when tol is higher than 1e-4. + of features is sufficiently large but the sample size is small. + selection : {'cyclic', 'random'}, default='cyclic' + How selections are made when `solver="cd"`. If set to 'random', a + random coefficient is updated every iteration rather than looping over + features sequentially by default. This (setting to 'random') often + leads to significantly faster convergence especially when tol is higher + than 1e-4. + normalize : boolean, default=False + If True, the predictors in X will be normalized by dividing by the + column-wise standard deviation. + If False, no scaling will be done. + Note: this is in contrast to sklearn's deprecated `normalize` flag, + which divides by the column-wise L2 norm; but this is the same as if + using sklearn's StandardScaler. handle : cuml.Handle Specifies the cuml.handle that holds internal CUDA state for computations in this model. Most importantly, this specifies the CUDA @@ -129,13 +89,39 @@ class ElasticNet( ---------- coef_ : array, shape (n_features) The estimated coefficients for the linear regression model. - intercept_ : array - The independent term. If `fit_intercept` is False, will be 0. + intercept_ : float + The independent term, will be 0 if `fit_intercept` is False. Notes ----- For additional docs, see `scikitlearn's ElasticNet `_. + + Examples + -------- + >>> import cupy as cp + >>> import cudf + >>> from cuml.linear_model import ElasticNet + >>> enet = ElasticNet(alpha = 0.1, l1_ratio=0.5, solver='qn') + >>> X = cudf.DataFrame() + >>> X['col1'] = cp.array([0, 1, 2], dtype = cp.float32) + >>> X['col2'] = cp.array([0, 1, 2], dtype = cp.float32) + >>> y = cudf.Series(cp.array([0.0, 1.0, 2.0], dtype = cp.float32) ) + >>> result_enet = enet.fit(X, y) + >>> print(result_enet.coef_) + 0 0.445... + 1 0.445... + dtype: float32 + >>> print(result_enet.intercept_) + 0.108433... + >>> X_new = cudf.DataFrame() + >>> X_new['col1'] = cp.array([3,2], dtype = cp.float32) + >>> X_new['col2'] = cp.array([5,5], dtype = cp.float32) + >>> preds = result_enet.predict(X_new) + >>> print(preds) + 0 3.674... + 1 3.228... + dtype: float32 """ coef_ = CumlArrayDescriptor(order="F") @@ -144,15 +130,16 @@ class ElasticNet( @classmethod def _get_param_names(cls): - return super()._get_param_names() + [ + return [ + *super()._get_param_names(), "alpha", "l1_ratio", "fit_intercept", - "normalize", "max_iter", "tol", "solver", "selection", + "normalize", ] @classmethod @@ -209,156 +196,91 @@ def _attrs_to_cpu(self, model): def __init__( self, - *, alpha=1.0, + *, l1_ratio=0.5, fit_intercept=True, - normalize=False, max_iter=1000, tol=1e-3, solver="cd", selection="cyclic", + normalize=False, handle=None, output_type=None, verbose=False, ): - """ - Initializes the elastic-net regression class. - - Parameters - ---------- - alpha : float or double. - l1_ratio : float or double. - fit_intercept: boolean. - normalize: boolean. - max_iter: int - tol: float or double. - solver: str, 'cd' or 'qn' - selection : str, 'cyclic', or 'random' - - For additional docs, see `scikitlearn's ElasticNet - `_. - """ - - # Hard-code verbosity as CoordinateDescent does not have verbosity super().__init__( handle=handle, verbose=verbose, output_type=output_type ) - self._check_alpha(alpha) - self._check_l1_ratio(l1_ratio) - self.alpha = alpha self.l1_ratio = l1_ratio self.fit_intercept = fit_intercept - self.solver = solver - self.normalize = normalize self.max_iter = max_iter self.tol = tol - self.solver_model = None - if selection in ["cyclic", "random"]: - self.selection = selection - else: - msg = "selection {!r} is not supported" - raise TypeError(msg.format(selection)) + self.solver = solver + self.selection = selection + self.normalize = normalize - self.intercept_value = 0.0 + @generate_docstring() + def fit( + self, X, y, sample_weight=None, *, convert_dtype=True + ) -> "ElasticNet": + """ + Fit the model with X and y. - shuffle = False - if self.selection == "random": - shuffle = True + """ + if self.alpha < 0.0: + raise ValueError(f"Expected alpha >= 0, got {self.alpha}") + if self.selection not in ["cyclic", "random"]: + raise ValueError(f"selection {self.selection!r} is not supported") + if self.l1_ratio < 0.0 or self.l1_ratio > 1.0: + raise ValueError( + f"Expected 0.0 <= l1_ratio <= 1.0, got {self.l1_ratio}" + ) - if solver == "qn": - pams = signature(self.__init__).parameters - if pams["selection"].default != selection: - warn( - "Parameter 'selection' has no effect " - "when 'qn' solver is used." - ) - if pams["normalize"].default != normalize: - warn( - "Parameter 'normalize' has no effect " - "when 'qn' solver is used." + if self.solver == "qn": + if self.normalize: + raise ValueError( + "`normalize=True` is not supported with `solver='qn'" ) - self.solver_model = QN( + solver = QN( + handle=self.handle, + verbose=self.verbose, + output_type=self.output_type, fit_intercept=self.fit_intercept, l1_strength=self.alpha * self.l1_ratio, l2_strength=self.alpha * (1.0 - self.l1_ratio), - max_iter=self.max_iter, - handle=self.handle, loss="l2", - tol=self.tol, penalty_normalized=False, - verbose=self.verbose, + max_iter=self.max_iter, + tol=self.tol, + ).fit( + X, y, sample_weight=sample_weight, convert_dtype=convert_dtype ) - elif solver == "cd": - self.solver_model = CD( - fit_intercept=self.fit_intercept, - normalize=self.normalize, + + coef = CumlArray(data=solver.coef_.to_output("cupy").flatten()) + intercept = solver.intercept_.item() + elif self.solver == "cd": + coef, intercept = fit_coordinate_descent( + X, + y, + sample_weight=sample_weight, + convert_dtype=convert_dtype, alpha=self.alpha, + fit_intercept=self.fit_intercept, l1_ratio=self.l1_ratio, - shuffle=shuffle, + normalize=self.normalize, + shuffle=self.selection == "random", max_iter=self.max_iter, - handle=self.handle, tol=self.tol, + handle=self.handle, ) else: - raise TypeError(f"solver {solver} is not supported") - - def _check_alpha(self, alpha): - if alpha <= 0.0: - msg = "alpha value has to be positive" - raise ValueError(msg.format(alpha)) - - def _check_l1_ratio(self, l1_ratio): - if l1_ratio < 0.0 or l1_ratio > 1.0: - msg = "l1_ratio value has to be between 0.0 and 1.0" - raise ValueError(msg.format(l1_ratio)) - - @generate_docstring() - def fit( - self, X, y, sample_weight=None, *, convert_dtype=True - ) -> "ElasticNet": - """ - Fit the model with X and y. - - """ - X_m, _, self.n_features_in_, self.dtype = input_to_cuml_array(X) - y_m, _, _, _ = input_to_cuml_array(y) - if hasattr(X_m, "index"): - self.feature_names_in_ = X_m.index - - # Check for multi-target regression - if (self.solver in ["cd", "qn"]) and y_m.ndim > 1 and y_m.shape[1] > 1: - raise ValueError( - f"The {self.solver} solver does not support " - "multi-target regression." - ) - - self.solver_model.fit( - X_m, y_m, convert_dtype=convert_dtype, sample_weight=sample_weight - ) - if isinstance(self.solver_model, QN): - coefs = self.solver_model.coef_ - self.coef_ = CumlArray( - data=coefs, - index=coefs._index, - dtype=coefs.dtype, - order=coefs.order, - shape=(coefs.shape[1],), - ) - self.intercept_ = self.solver_model.intercept_.item() - else: - self.coef_ = self.solver_model.coef_ - self.intercept_ = self.solver_model.intercept_ + raise ValueError(f"solver {self.solver} is not supported") - return self + self.coef_ = coef + self.intercept_ = intercept - def set_params(self, **params): - super().set_params(**params) - if "selection" in params: - params.pop("selection") - params["shuffle"] = self.selection == "random" - self.solver_model.set_params(**params) return self diff --git a/python/cuml/cuml/linear_model/lasso.py b/python/cuml/cuml/linear_model/lasso.py index 3f7a2df8db..845e2ed187 100644 --- a/python/cuml/cuml/linear_model/lasso.py +++ b/python/cuml/cuml/linear_model/lasso.py @@ -7,47 +7,10 @@ class Lasso(ElasticNet): - """ - Lasso extends LinearRegression by providing L1 regularization on the - coefficients when predicting response y with a linear combination of the - predictors in X. It can zero some of the coefficients for feature - selection and improves the conditioning of the problem. - - cuML's Lasso can take array-like objects, either in host as - NumPy arrays or in device (as Numba or `__cuda_array_interface__` - compliant), in addition to cuDF objects. It uses coordinate descent to fit - a linear model. - - Examples - -------- + Linear Model trained with L1 prior as regularizer (aka the Lasso). - .. code-block:: python - - >>> import numpy as np - >>> import cudf - >>> from cuml.linear_model import Lasso - >>> ls = Lasso(alpha = 0.1, solver='qn') - >>> X = cudf.DataFrame() - >>> X['col1'] = np.array([0, 1, 2], dtype = np.float32) - >>> X['col2'] = np.array([0, 1, 2], dtype = np.float32) - >>> y = cudf.Series( np.array([0.0, 1.0, 2.0], dtype = np.float32) ) - >>> result_lasso = ls.fit(X, y) - >>> print(result_lasso.coef_) - 0 0.425 - 1 0.425 - dtype: float32 - >>> print(result_lasso.intercept_) - 0.150000... - - >>> X_new = cudf.DataFrame() - >>> X_new['col1'] = np.array([3,2], dtype = np.float32) - >>> X_new['col2'] = np.array([5,5], dtype = np.float32) - >>> preds = result_lasso.predict(X_new) - >>> print(preds) - 0 3.549997 - 1 3.124997 - dtype: float32 + This is the same as ``ElasticNet(l1_ratio=1.0)`` (no L2 penalty). Parameters ---------- @@ -61,13 +24,6 @@ class Lasso(ElasticNet): fit_intercept : boolean (default = True) If True, Lasso tries to correct for the global mean of y. If False, the model expects that you have centered the data. - normalize : boolean (default = False) - If True, the predictors in X will be normalized by dividing by the - column-wise standard deviation. - If False, no scaling will be done. - Note: this is in contrast to sklearn's deprecated `normalize` flag, - which divides by the column-wise L2 norm; but this is the same as if - using sklearn's StandardScaler. max_iter : int (default = 1000) The maximum number of iterations tol : float (default = 1e-3) @@ -87,6 +43,13 @@ class Lasso(ElasticNet): rather than looping over features sequentially by default. This (setting to 'random') often leads to significantly faster convergence especially when tol is higher than 1e-4. + normalize : boolean (default = False) + If True, the predictors in X will be normalized by dividing by the + column-wise standard deviation. + If False, no scaling will be done. + Note: this is in contrast to sklearn's deprecated `normalize` flag, + which divides by the column-wise L2 norm; but this is the same as if + using sklearn's StandardScaler. handle : cuml.Handle Specifies the cuml.handle that holds internal CUDA state for computations in this model. Most importantly, this specifies the CUDA @@ -115,6 +78,33 @@ class Lasso(ElasticNet): ----- For additional docs, see `scikitlearn's Lasso `_. + + Examples + -------- + >>> import numpy as np + >>> import cudf + >>> from cuml.linear_model import Lasso + >>> ls = Lasso(alpha = 0.1, solver='qn') + >>> X = cudf.DataFrame() + >>> X['col1'] = np.array([0, 1, 2], dtype = np.float32) + >>> X['col2'] = np.array([0, 1, 2], dtype = np.float32) + >>> y = cudf.Series( np.array([0.0, 1.0, 2.0], dtype = np.float32) ) + >>> result_lasso = ls.fit(X, y) + >>> print(result_lasso.coef_) + 0 0.425 + 1 0.425 + dtype: float32 + >>> print(result_lasso.intercept_) + 0.150000... + + >>> X_new = cudf.DataFrame() + >>> X_new['col1'] = np.array([3,2], dtype = np.float32) + >>> X_new['col2'] = np.array([5,5], dtype = np.float32) + >>> preds = result_lasso.predict(X_new) + >>> print(preds) + 0 3.549997 + 1 3.124997 + dtype: float32 """ _cpu_class_path = "sklearn.linear_model.Lasso" @@ -136,28 +126,28 @@ def _params_to_cpu(self): def __init__( self, - *, alpha=1.0, + *, fit_intercept=True, - normalize=False, max_iter=1000, tol=1e-3, solver="cd", selection="cyclic", + normalize=False, handle=None, output_type=None, verbose=False, ): # Lasso is just a special case of ElasticNet super().__init__( - l1_ratio=1.0, alpha=alpha, + l1_ratio=1.0, fit_intercept=fit_intercept, - normalize=normalize, max_iter=max_iter, tol=tol, solver=solver, selection=selection, + normalize=normalize, handle=handle, output_type=output_type, verbose=verbose, diff --git a/python/cuml/cuml/solvers/cd.pyx b/python/cuml/cuml/solvers/cd.pyx index f3ec7b9ab8..cb15781288 100644 --- a/python/cuml/cuml/solvers/cd.pyx +++ b/python/cuml/cuml/solvers/cd.pyx @@ -1,13 +1,8 @@ # SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # - -# distutils: language = c++ - import numpy as np -from libc.stdint cimport uintptr_t - from cuml.common import CumlArray from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.common.doc_utils import generate_docstring @@ -15,12 +10,14 @@ from cuml.internals.base import Base from cuml.internals.input_utils import input_to_cuml_array from cuml.internals.mixins import FMajorInputTagMixin +from libc.stdint cimport uintptr_t from libcpp cimport bool from pylibraft.common.handle cimport handle_t +__all__ = ("fit_coordinate_descent", "CD") -cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil: +cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil: cdef void cdFit(handle_t& handle, float *input, int n_rows, @@ -74,8 +71,149 @@ cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil: int loss) except + -class CD(Base, - FMajorInputTagMixin): +def fit_coordinate_descent( + X, + y, + sample_weight=None, + *, + convert_dtype=True, + loss="squared_loss", + double alpha=0.0001, + double l1_ratio=0.15, + bool fit_intercept=True, + bool normalize=False, + int max_iter=1000, + double tol=1e-3, + bool shuffle=True, + handle=None, +): + """Fit a linear model using coordinate descent. + + Parameters + ---------- + X : array-like, shape=(n_samples, n_features) + The training data. + y : array-like, shape=(n_samples,) + The target values. + sample_weight : None or array-like, shape=(n_samples,) + The sample weights. + convert_to_dtype : bool, default=True + When set to True, will convert array inputs to be of the proper dtypes. + **kwargs + Remaining keyword arguments match the hyperparameters + to ``CD``, see the ``CD`` docs for more information. + + Returns + ------- + coef : CumlArray, shape=(n_features,) + The fit coefficients + intercept : float + The fit intercept, or 0 if `fit_intercept=False` + """ + # Process and validate parameters + if loss != "squared_loss": + raise ValueError(f"{loss=!r} is not supported") + + if alpha < 0.0: + raise ValueError(f"Expected alpha >= 0, got {alpha}") + + # Process and validate input arrays + cdef int n_rows, n_cols + X, n_rows, n_cols, _ = input_to_cuml_array( + X, + convert_to_dtype=(np.float32 if convert_dtype else None), + check_dtype=[np.float32, np.float64], + ) + + if n_rows < 2: + raise ValueError( + f"Found array with {n_rows} sample(s) (shape={X.shape}) while a " + f"minimum of 2 is required." + ) + if n_cols < 1: + raise ValueError( + f"Found array with {n_cols} feature(s) (shape={X.shape}) while " + f"a minimum of 1 is required." + ) + + y = input_to_cuml_array( + y, + check_dtype=X.dtype, + convert_to_dtype=(X.dtype if convert_dtype else None), + check_rows=n_rows, + check_cols=1, + ).array + + if sample_weight is not None: + sample_weight = input_to_cuml_array( + sample_weight, + check_dtype=X.dtype, + convert_to_dtype=(X.dtype if convert_dtype else None), + check_rows=n_rows, + check_cols=1, + ).array + + # Allocate outputs + coef = CumlArray.zeros(n_cols, dtype=X.dtype) + + cdef uintptr_t X_ptr = X.ptr + cdef uintptr_t y_ptr = y.ptr + cdef uintptr_t sample_weight_ptr = ( + 0 if sample_weight is None else sample_weight.ptr + ) + cdef uintptr_t coef_ptr = coef.ptr + + cdef float intercept_f32 + cdef double intercept_f64 + cdef handle_t* handle_ = handle.getHandle() + cdef bool is_float32 = X.dtype == np.float32 + + # Perform fit + with nogil: + if is_float32: + cdFit( + handle_[0], + X_ptr, + n_rows, + n_cols, + y_ptr, + coef_ptr, + &intercept_f32, + fit_intercept, + normalize, + max_iter, + 0, + alpha, + l1_ratio, + shuffle, + tol, + sample_weight_ptr + ) + else: + cdFit( + handle_[0], + X_ptr, + n_rows, + n_cols, + y_ptr, + coef_ptr, + &intercept_f64, + fit_intercept, + normalize, + max_iter, + 0, + alpha, + l1_ratio, + shuffle, + tol, + sample_weight_ptr + ) + handle.sync() + + return coef, intercept_f32 if is_float32 else intercept_f64 + + +class CD(Base, FMajorInputTagMixin): """ Coordinate Descent (CD) is a very common optimization algorithm that minimizes along coordinate directions to find the minimum of a function. @@ -84,40 +222,6 @@ class CD(Base, input dataset.algorithm The CD algorithm currently works with linear regression and ridge, lasso, and elastic-net penalties. - Examples - -------- - .. code-block:: python - - >>> import cupy as cp - >>> import cudf - >>> from cuml.solvers import CD as cumlCD - - >>> cd = cumlCD(alpha=0.0) - - >>> X = cudf.DataFrame() - >>> X['col1'] = cp.array([1,1,2,2], dtype=cp.float32) - >>> X['col2'] = cp.array([1,2,2,3], dtype=cp.float32) - - >>> y = cudf.Series(cp.array([6.0, 8.0, 9.0, 11.0], dtype=cp.float32)) - - >>> cd.fit(X,y) - CD() - >>> print(cd.coef_) # doctest: +SKIP - 0 1.001... - 1 1.998... - dtype: float32 - >>> print(cd.intercept_) # doctest: +SKIP - 3.00... - >>> X_new = cudf.DataFrame() - >>> X_new['col1'] = cp.array([3,2], dtype=cp.float32) - >>> X_new['col2'] = cp.array([5,5], dtype=cp.float32) - - >>> preds = cd.predict(X_new) - >>> print(preds) # doctest: +SKIP - 0 15.997... - 1 14.995... - dtype: float32 - Parameters ---------- loss : 'squared_loss' @@ -165,21 +269,59 @@ class CD(Base, (`cuml.global_settings.output_type`) will be used. See :ref:`output-data-type-configuration` for more info. + Examples + -------- + >>> import cupy as cp + >>> import cudf + >>> from cuml.solvers import CD + + >>> cd = CD(alpha=0.0) + + >>> X = cudf.DataFrame() + >>> X['col1'] = cp.array([1,1,2,2], dtype=cp.float32) + >>> X['col2'] = cp.array([1,2,2,3], dtype=cp.float32) + + >>> y = cudf.Series(cp.array([6.0, 8.0, 9.0, 11.0], dtype=cp.float32)) + + >>> cd.fit(X,y) + CD() + >>> print(cd.coef_) # doctest: +SKIP + 0 1.001... + 1 1.998... + dtype: float32 + >>> print(cd.intercept_) # doctest: +SKIP + 3.00... + >>> X_new = cudf.DataFrame() + >>> X_new['col1'] = cp.array([3,2], dtype=cp.float32) + >>> X_new['col2'] = cp.array([5,5], dtype=cp.float32) + + >>> preds = cd.predict(X_new) + >>> print(preds) # doctest: +SKIP + 0 15.997... + 1 14.995... + dtype: float32 """ - coef_ = CumlArrayDescriptor() + @classmethod + def _get_param_names(cls): + return [ + *super()._get_param_names(), + "loss", + "alpha", + "l1_ratio", + "fit_intercept", + "normalize", + "max_iter", + "tol", + "shuffle", + ] + def __init__(self, *, loss='squared_loss', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, normalize=False, max_iter=1000, tol=1e-3, shuffle=True, handle=None, output_type=None, verbose=False): - if loss not in ['squared_loss']: - msg = "loss {!r} is not supported" - raise NotImplementedError(msg.format(loss)) - - super().__init__(handle=handle, - verbose=verbose, - output_type=output_type) + super().__init__(handle=handle, verbose=verbose, output_type=output_type) self.loss = loss self.alpha = alpha @@ -189,107 +331,29 @@ class CD(Base, self.max_iter = max_iter self.tol = tol self.shuffle = shuffle - self.intercept_value = 0.0 - self.coef_ = None - self.intercept_ = None - - def _check_alpha(self, alpha): - for el in alpha: - if el <= 0.0: - msg = "alpha values have to be positive" - raise TypeError(msg.format(alpha)) - - def _get_loss_int(self): - return { - 'squared_loss': 0, - }[self.loss] @generate_docstring() def fit(self, X, y, convert_dtype=True, sample_weight=None) -> "CD": """ Fit the model with X and y. - """ - cdef uintptr_t sample_weight_ptr - X_m, n_rows, self.n_cols, self.dtype = \ - input_to_cuml_array(X, - convert_to_dtype=(np.float32 if convert_dtype - else None), - check_dtype=[np.float32, np.float64]) - - y_m, *_ = \ - input_to_cuml_array(y, check_dtype=self.dtype, - convert_to_dtype=(self.dtype if convert_dtype - else None), - check_rows=n_rows, check_cols=1) - - if sample_weight is not None: - sample_weight_m, _, _, _ = \ - input_to_cuml_array(sample_weight, check_dtype=self.dtype, - convert_to_dtype=( - self.dtype if convert_dtype else None), - check_rows=n_rows, check_cols=1) - sample_weight_ptr = sample_weight_m.ptr - else: - sample_weight_ptr = 0 - - cdef uintptr_t _X_ptr = X_m.ptr - cdef uintptr_t _y_ptr = y_m.ptr - - self.n_alpha = 1 - - self.coef_ = CumlArray.zeros(self.n_cols, dtype=self.dtype) - cdef uintptr_t _coef_ptr = self.coef_.ptr - - cdef float _c_intercept_f32 - cdef double _c_intercept2_f64 - - cdef handle_t* handle_ = self.handle.getHandle() - - if self.dtype == np.float32: - cdFit(handle_[0], - _X_ptr, - n_rows, - self.n_cols, - _y_ptr, - _coef_ptr, - &_c_intercept_f32, - self.fit_intercept, - self.normalize, - self.max_iter, - self._get_loss_int(), - self.alpha, - self.l1_ratio, - self.shuffle, - self.tol, - sample_weight_ptr) - - self.intercept_ = _c_intercept_f32 - else: - cdFit(handle_[0], - _X_ptr, - n_rows, - self.n_cols, - _y_ptr, - _coef_ptr, - &_c_intercept2_f64, - self.fit_intercept, - self.normalize, - self.max_iter, - self._get_loss_int(), - self.alpha, - self.l1_ratio, - self.shuffle, - self.tol, - sample_weight_ptr) - - self.intercept_ = _c_intercept2_f64 - - self.handle.sync() - del X_m - del y_m - if sample_weight is not None: - del sample_weight_m + coef, intercept = fit_coordinate_descent( + X, + y, + sample_weight=sample_weight, + convert_dtype=convert_dtype, + loss=self.loss, + alpha=self.alpha, + l1_ratio=self.l1_ratio, + fit_intercept=self.fit_intercept, + normalize=self.normalize, + max_iter=self.max_iter, + tol=self.tol, + shuffle=self.shuffle, + handle=self.handle, + ) + self.coef_ = coef + self.intercept_ = intercept return self @@ -300,57 +364,47 @@ class CD(Base, def predict(self, X, convert_dtype=True) -> CumlArray: """ Predicts the y for X. - """ - X_m, n_rows, _n_cols, _ = \ - input_to_cuml_array(X, check_dtype=self.dtype, - convert_to_dtype=(self.dtype if convert_dtype - else None), - check_cols=self.n_cols) - - cdef uintptr_t _X_ptr = X_m.ptr - cdef uintptr_t _coef_ptr = self.coef_.ptr - - preds = CumlArray.zeros(n_rows, dtype=self.dtype, - index=X_m.index) - cdef uintptr_t _preds_ptr = preds.ptr - + cdef int n_rows, n_cols + X, n_rows, n_cols, _ = input_to_cuml_array( + X, + check_dtype=self.coef_.dtype, + convert_to_dtype=(self.coef_.dtype if convert_dtype else None), + check_cols=self.coef_.shape[0], + ) + + preds = CumlArray.zeros(n_rows, dtype=self.coef_.dtype, index=X.index) + + cdef uintptr_t X_ptr = X.ptr + cdef uintptr_t preds_ptr = preds.ptr + cdef uintptr_t coef_ptr = self.coef_.ptr + cdef double intercept = self.intercept_ cdef handle_t* handle_ = self.handle.getHandle() - - if self.dtype == np.float32: - cdPredict(handle_[0], - _X_ptr, - n_rows, - _n_cols, - _coef_ptr, - self.intercept_, - _preds_ptr, - self._get_loss_int()) - else: - cdPredict(handle_[0], - _X_ptr, - n_rows, - _n_cols, - _coef_ptr, - self.intercept_, - _preds_ptr, - self._get_loss_int()) - + cdef bool is_float32 = self.coef_.dtype == np.float32 + + with nogil: + if is_float32: + cdPredict( + handle_[0], + X_ptr, + n_rows, + n_cols, + coef_ptr, + intercept, + preds_ptr, + 0, + ) + else: + cdPredict( + handle_[0], + X_ptr, + n_rows, + n_cols, + coef_ptr, + intercept, + preds_ptr, + 0, + ) self.handle.sync() - del X_m - return preds - - @classmethod - def _get_param_names(cls): - return super()._get_param_names() + [ - "loss", - "alpha", - "l1_ratio", - "fit_intercept", - "normalize", - "max_iter", - "tol", - "shuffle", - ] diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml index b433525b29..f7e42f956a 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml @@ -261,12 +261,6 @@ - "sklearn.linear_model.tests.test_ridge::test_ridge_sample_weight_consistency[42-saga-wide-csr_matrix-False]" - "sklearn.linear_model.tests.test_ridge::test_ridgecv_sample_weight" - "sklearn.linear_model.tests.test_sag::test_step_size_alpha_error" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_explicit_sparse_input[lil_array]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_explicit_sparse_input[lil_matrix]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_array-False]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_array-True]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_matrix-False]" - - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_matrix-True]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_array]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_matrix]" - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]" @@ -1301,10 +1295,8 @@ - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_complex_data]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_dtype_object]" - - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit2d_1sample]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_non_transformer_estimators_n_iter]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_regressor_data_not_an_array]" @@ -1340,10 +1332,8 @@ - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_complex_data]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_dtype_object]" - - "sklearn.tests.test_common::test_estimators[Lasso()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit2d_1sample]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_non_transformer_estimators_n_iter]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_regressor_data_not_an_array]" diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py index 1ed0e5e1bf..df29502c0c 100644 --- a/python/cuml/tests/test_linear_model.py +++ b/python/cuml/tests/test_linear_model.py @@ -1220,10 +1220,7 @@ def test_elasticnet_model(datatype, solver, nrows, column_info, ntargets): cuelastic = cuElasticNet(alpha=0.1, l1_ratio=0.5, solver=solver) if ntargets > 1: - with pytest.raises( - ValueError, - match="The .* solver does not support multi-target regression.", - ): + with pytest.raises(ValueError, match="Expected 1 columns"): cuelastic.fit(X_train, y_train) return diff --git a/python/cuml/tests/test_sklearn_compatibility.py b/python/cuml/tests/test_sklearn_compatibility.py index 6584a53259..7bd4d77186 100644 --- a/python/cuml/tests/test_sklearn_compatibility.py +++ b/python/cuml/tests/test_sklearn_compatibility.py @@ -542,7 +542,6 @@ Lasso: { "check_estimator_tags_renamed": "No support for modern tags infrastructure", "check_no_attributes_set_in_init": "Lasso sets attributes during init", - "check_dont_overwrite_parameters": "Lasso overwrites parameters during fit", "check_estimators_unfitted": "Lasso does not raise NotFittedError before fit", "check_do_not_raise_errors_in_init_or_set_params": "Lasso raises errors in init or set_params", "check_n_features_in_after_fitting": "Lasso does not check n_features_in consistency", @@ -550,7 +549,6 @@ "check_sample_weights_list": "Lasso does not handle list sample weights", "check_complex_data": "Lasso does not handle complex data", "check_dtype_object": "Lasso does not handle object dtype", - "check_estimators_empty_data_messages": "Lasso does not handle empty data", "check_estimators_nan_inf": "Lasso does not check for NaN and inf", "check_estimator_sparse_tag": "Lasso does not support sparse data", "check_estimator_sparse_array": "Lasso does not handle sparse arrays gracefully", @@ -563,8 +561,6 @@ "check_supervised_y_no_nan": "Lasso does not check for NaN in y", "check_non_transformer_estimators_n_iter": "Lasso does not have n_iter_ attribute", "check_parameters_default_constructible": "Lasso parameters are mutated on init", - "check_fit2d_1sample": "Lasso does not handle single sample", - "check_set_params": "Lasso does not handle set_params properly", "check_fit1d": "Lasso does not raise ValueError for 1D input", "check_fit2d_predict1d": "Lasso does not handle 1D prediction input gracefully", "check_requires_y_none": "Lasso does not handle y=None", @@ -572,7 +568,6 @@ ElasticNet: { "check_estimator_tags_renamed": "No support for modern tags infrastructure", "check_no_attributes_set_in_init": "ElasticNet sets attributes during init", - "check_dont_overwrite_parameters": "ElasticNet overwrites parameters during fit", "check_estimators_unfitted": "ElasticNet does not raise NotFittedError before fit", "check_do_not_raise_errors_in_init_or_set_params": "ElasticNet raises errors in init or set_params", "check_n_features_in_after_fitting": "ElasticNet does not check n_features_in consistency", @@ -580,7 +575,6 @@ "check_sample_weights_list": "ElasticNet does not handle list sample weights", "check_complex_data": "ElasticNet does not handle complex data", "check_dtype_object": "ElasticNet does not handle object dtype", - "check_estimators_empty_data_messages": "ElasticNet does not handle empty data", "check_estimators_nan_inf": "ElasticNet does not check for NaN and inf", "check_estimator_sparse_tag": "ElasticNet does not support sparse data", "check_estimator_sparse_array": "ElasticNet does not handle sparse arrays gracefully", @@ -593,8 +587,6 @@ "check_supervised_y_no_nan": "ElasticNet does not check for NaN in y", "check_non_transformer_estimators_n_iter": "ElasticNet does not have n_iter_ attribute", "check_parameters_default_constructible": "ElasticNet parameters are mutated on init", - "check_fit2d_1sample": "ElasticNet does not handle single sample", - "check_set_params": "ElasticNet does not handle set_params properly", "check_fit1d": "ElasticNet does not raise ValueError for 1D input", "check_fit2d_predict1d": "ElasticNet does not handle 1D prediction input gracefully", "check_requires_y_none": "ElasticNet does not handle y=None", diff --git a/python/cuml/tests/test_solver_attributes.py b/python/cuml/tests/test_solver_attributes.py index eb86773864..9d1bddd89a 100644 --- a/python/cuml/tests/test_solver_attributes.py +++ b/python/cuml/tests/test_solver_attributes.py @@ -75,14 +75,9 @@ def test_elastic_net_attributes(): clf.fit(X, y) attrs = [ - "dtype", - "solver_model", + "n_features_in_", "coef_", "intercept_", - "l1_ratio", - "alpha", - "max_iter", - "fit_intercept", ] for attr in attrs: assert hasattr(clf, attr) @@ -94,12 +89,9 @@ def test_lasso_attributes(): clf.fit(X, y) attrs = [ - "dtype", - "solver_model", + "n_features_in_", "coef_", "intercept_", - "solver_model", - "l1_ratio", ] for attr in attrs: assert hasattr(clf, attr)