Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions python/cuml/cuml/dask/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#

import warnings
from collections.abc import Iterable
from functools import wraps

Expand Down Expand Up @@ -470,3 +471,17 @@ def _transform_func(model, data, **kwargs):

def _inverse_transform_func(model, data, **kwargs):
return model.inverse_transform(data, **kwargs)


def check_deprecated_normalize(model):
"""Warn if the deprecated `normalize` option is used."""
if model.kwargs.get("normalize"):
cls_name = type(model).__name__
warnings.warn(
(
f"The `normalize` option to `{cls_name}` was deprecated in "
f"25.12 and will be removed in 26.02. Please use a `StandardScaler` "
f"to normalize your data external to `{cls_name}`."
),
FutureWarning,
)
15 changes: 9 additions & 6 deletions python/cuml/cuml/dask/linear_model/elastic_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
#

from cuml.dask.common.base import BaseEstimator
from cuml.dask.common.base import BaseEstimator, check_deprecated_normalize
from cuml.dask.solvers import CD


Expand Down Expand Up @@ -36,10 +36,13 @@ class ElasticNet(BaseEstimator):
fit_intercept : boolean (default = True)
If True, Lasso tries to correct for the global mean of y.
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by it's L2
norm.
If False, no scaling will be done.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

max_iter : int (default = 1000)
The maximum number of iterations
tol : float (default = 1e-3)
Expand Down Expand Up @@ -103,7 +106,7 @@ def fit(self, X, y):
Dense matrix (floats or doubles) of shape (n_samples, n_features).

"""

check_deprecated_normalize(self)
self.solver.fit(X, y)
return self

Expand Down
14 changes: 9 additions & 5 deletions python/cuml/cuml/dask/linear_model/lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
#

from cuml.dask.common.base import BaseEstimator
from cuml.dask.common.base import BaseEstimator, check_deprecated_normalize
from cuml.dask.solvers import CD


Expand All @@ -30,10 +30,13 @@ class Lasso(BaseEstimator):
fit_intercept : boolean (default = True)
If True, Lasso tries to correct for the global mean of y.
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by it's L2
norm.
If False, no scaling will be done.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

max_iter : int (default = 1000)
The maximum number of iterations
tol : float (default = 1e-3)
Expand Down Expand Up @@ -83,6 +86,7 @@ def fit(self, X, y):
Dense matrix (floats or doubles) of shape (n_samples, n_features).

"""
check_deprecated_normalize(self)

self.solver.fit(X, y)

Expand Down
12 changes: 8 additions & 4 deletions python/cuml/cuml/dask/linear_model/linear_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
BaseEstimator,
DelayedPredictionMixin,
SyncFitMixinLinearModel,
check_deprecated_normalize,
mnmg_import,
)

Expand Down Expand Up @@ -42,10 +43,12 @@ class LinearRegression(
LinearRegression adds an additional term c to correct for the global
mean of y, modeling the response as "x * beta + c".
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by its
L2 norm.
If False, no scaling will be done.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

Attributes
----------
Expand All @@ -69,6 +72,7 @@ def fit(self, X, y):
y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
Labels (outcome values)
"""
check_deprecated_normalize(self)

models = self._fit(
model_func=LinearRegression._create_model, data=(X, y)
Expand Down
12 changes: 8 additions & 4 deletions python/cuml/cuml/dask/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
BaseEstimator,
DelayedPredictionMixin,
SyncFitMixinLinearModel,
check_deprecated_normalize,
mnmg_import,
)

Expand Down Expand Up @@ -46,10 +47,12 @@ class Ridge(BaseEstimator, SyncFitMixinLinearModel, DelayedPredictionMixin):
If True, Ridge adds an additional term c to correct for the global
mean of y, modeling the response as "x * beta + c".
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by it's L2
norm.
If False, no scaling will be done.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

Attributes
----------
Expand Down Expand Up @@ -79,6 +82,7 @@ def fit(self, X, y):
y : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, 1)
Labels (outcome values)
"""
check_deprecated_normalize(self)

models = self._fit(model_func=Ridge._create_model, data=(X, y))

Expand Down
17 changes: 9 additions & 8 deletions python/cuml/cuml/experimental/linear_model/lars.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ from cuml.common.doc_utils import generate_docstring
from cuml.internals.array import CumlArray
from cuml.internals.base import Base
from cuml.internals.mixins import RegressorMixin
from cuml.linear_model.base import check_deprecated_normalize

from pylibraft.common.handle cimport handle_t

Expand Down Expand Up @@ -78,14 +79,12 @@ class Lars(Base, RegressorMixin):
fit_intercept : boolean (default = True)
If True, Lars tries to correct for the global mean of y.
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
This parameter is ignored when `fit_intercept` is set to False.
If True, the predictors in X will be normalized by removing its mean
and dividing by it's variance. If False, then the solver expects that
the data is already normalized.
normalize : boolean, default=False

.. versionchanged:: 24.06
The default of `normalize` changed from `True` to `False`.
.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

copy_X : boolean (default = True)
The solver permutes the columns of X. Set `copy_X` to True to prevent
Expand Down Expand Up @@ -157,7 +156,7 @@ class Lars(Base, RegressorMixin):
coef_ = CumlArrayDescriptor()
intercept_ = CumlArrayDescriptor()

def __init__(self, *, fit_intercept=True, normalize=True,
def __init__(self, *, fit_intercept=True, normalize=False,
handle=None, verbose=False, output_type=None, copy_X=True,
fit_path=True, n_nonzero_coefs=500, eps=None,
precompute='auto'):
Expand Down Expand Up @@ -291,6 +290,8 @@ class Lars(Base, RegressorMixin):
Fit the model with X and y.

"""
check_deprecated_normalize(self)

self._set_n_features_in(X)
self._set_output_type(X)

Expand Down
16 changes: 16 additions & 0 deletions python/cuml/cuml/linear_model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#
import warnings

import cuml.internals
from cuml.common.doc_utils import generate_docstring
from cuml.internals.array import CumlArray
Expand Down Expand Up @@ -47,3 +49,17 @@ def predict(self, X, *, convert_dtype=True) -> CumlArray:
out += intercept

return CumlArray(out, index=X.index)


def check_deprecated_normalize(model):
"""Warn if the deprecated `normalize` option is used."""
if model.normalize:
cls_name = type(model).__name__
warnings.warn(
(
f"The `normalize` option to `{cls_name}` was deprecated in "
f"25.12 and will be removed in 26.02. Please use a `StandardScaler` "
f"to normalize your data external to `{cls_name}`."
),
FutureWarning,
)
19 changes: 12 additions & 7 deletions python/cuml/cuml/linear_model/elastic_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
to_gpu,
)
from cuml.internals.mixins import FMajorInputTagMixin, RegressorMixin
from cuml.linear_model.base import LinearPredictMixin
from cuml.linear_model.base import (
LinearPredictMixin,
check_deprecated_normalize,
)
from cuml.solvers import QN
from cuml.solvers.cd import fit_coordinate_descent

Expand Down Expand Up @@ -62,12 +65,12 @@ class ElasticNet(
leads to significantly faster convergence especially when tol is higher
than 1e-4.
normalize : boolean, default=False
If True, the predictors in X will be normalized by dividing by the
column-wise standard deviation.
If False, no scaling will be done.
Note: this is in contrast to sklearn's deprecated `normalize` flag,
which divides by the column-wise L2 norm; but this is the same as if
using sklearn's StandardScaler.

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

handle : cuml.Handle
Specifies the cuml.handle that holds internal CUDA state for
computations in this model. Most importantly, this specifies the CUDA
Expand Down Expand Up @@ -230,6 +233,8 @@ def fit(
Fit the model with X and y.

"""
check_deprecated_normalize(self)

if self.alpha < 0.0:
raise ValueError(f"Expected alpha >= 0, got {self.alpha}")
if self.selection not in ["cyclic", "random"]:
Expand Down
14 changes: 7 additions & 7 deletions python/cuml/cuml/linear_model/lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ class Lasso(ElasticNet):
rather than looping over features sequentially by default.
This (setting to 'random') often leads to significantly faster
convergence especially when tol is higher than 1e-4.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by the
column-wise standard deviation.
If False, no scaling will be done.
Note: this is in contrast to sklearn's deprecated `normalize` flag,
which divides by the column-wise L2 norm; but this is the same as if
using sklearn's StandardScaler.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

handle : cuml.Handle
Specifies the cuml.handle that holds internal CUDA state for
computations in this model. Most importantly, this specifies the CUDA
Expand Down
22 changes: 13 additions & 9 deletions python/cuml/cuml/linear_model/linear_regression.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ from cuml.internals.interop import (
to_gpu,
)
from cuml.internals.mixins import FMajorInputTagMixin, RegressorMixin
from cuml.linear_model.base import LinearPredictMixin
from cuml.linear_model.base import (
LinearPredictMixin,
check_deprecated_normalize,
)

from libc.stdint cimport uintptr_t
from libcpp cimport bool
Expand Down Expand Up @@ -162,14 +165,13 @@ class LinearRegression(Base,
If True, cuml will copy X when needed to avoid mutating the input array.
If you're ok with X being overwritten, setting to False may avoid a copy,
reducing memory usage for certain algorithms.
normalize : boolean (default = False)
This parameter is ignored when `fit_intercept` is set to False.
If True, the predictors in X will be normalized by dividing by the
column-wise standard deviation.
If False, no scaling will be done.
Note: this is in contrast to sklearn's deprecated `normalize` flag,
which divides by the column-wise L2 norm; but this is the same as if
using sklearn's StandardScaler.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

handle : cuml.Handle
Specifies the cuml.handle that holds internal CUDA state for
computations in this model. Most importantly, this specifies the CUDA
Expand Down Expand Up @@ -315,6 +317,8 @@ class LinearRegression(Base,
Fit the model with X and y.

"""
check_deprecated_normalize(self)

X_m = input_to_cuml_array(
X,
convert_to_dtype=(np.float32 if convert_dtype else None),
Expand Down
3 changes: 3 additions & 0 deletions python/cuml/cuml/linear_model/linear_regression_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np

import cuml.internals
from cuml.linear_model.base import check_deprecated_normalize
from cuml.linear_model.base_mg import MGFitMixin
from cuml.linear_model.linear_regression import Algo, LinearRegression

Expand Down Expand Up @@ -43,6 +44,8 @@ cdef extern from "cuml/linear_model/ols_mg.hpp" namespace "ML::OLS::opg" nogil:
class LinearRegressionMG(MGFitMixin, LinearRegression):
@cuml.internals.api_base_return_any_skipall
def _fit(self, X, y, coef_ptr, input_desc):
check_deprecated_normalize(self)
Comment thread
jcrist marked this conversation as resolved.

cdef int algo = (
Algo.EIG if self.algorithm == "auto" else Algo.parse(self.algorithm)
)
Expand Down
21 changes: 13 additions & 8 deletions python/cuml/cuml/linear_model/ridge.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ from cuml.internals.base import Base
from cuml.internals.input_utils import input_to_cuml_array
from cuml.internals.interop import InteropMixin, UnsupportedOnGPU, to_gpu
from cuml.internals.mixins import FMajorInputTagMixin, RegressorMixin
from cuml.linear_model.base import LinearPredictMixin
from cuml.linear_model.base import (
LinearPredictMixin,
check_deprecated_normalize,
)

from libc.stdint cimport uintptr_t
from libcpp cimport bool
Expand Down Expand Up @@ -134,13 +137,13 @@ class Ridge(Base,
fit_intercept : boolean (default = True)
If True, Ridge tries to correct for the global mean of y.
If False, the model expects that you have centered the data.
normalize : boolean (default = False)
If True, the predictors in X will be normalized by dividing by the
column-wise standard deviation.
If False, no scaling will be done.
Note: this is in contrast to sklearn's deprecated `normalize` flag,
which divides by the column-wise L2 norm; but this is the same as if
using sklearn's StandardScaler.
normalize : boolean, default=False

.. deprecated:: 25.12
``normalize`` is deprecated and will be removed in 26.02. When
needed, please use a ``StandardScaler`` to normalize your data
before passing to ``fit``.

handle : cuml.Handle
Specifies the cuml.handle that holds internal CUDA state for
computations in this model. Most importantly, this specifies the CUDA
Expand Down Expand Up @@ -283,6 +286,8 @@ class Ridge(Base,
"""
Fit the model with X and y.
"""
check_deprecated_normalize(self)

cdef size_t n_rows, n_cols
X, n_rows, n_cols, dtype = input_to_cuml_array(
X,
Expand Down
Loading