diff --git a/python/cuml/cuml/linear_model/elastic_net.py b/python/cuml/cuml/linear_model/elastic_net.py
index d28efd1deb..d0e388b678 100644
--- a/python/cuml/cuml/linear_model/elastic_net.py
+++ b/python/cuml/cuml/linear_model/elastic_net.py
@@ -2,10 +2,6 @@
 # SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
-
-from inspect import signature
-
-from cuml.common import input_to_cuml_array
 from cuml.common.array_descriptor import CumlArrayDescriptor
 from cuml.common.doc_utils import generate_docstring
 from cuml.internals.array import CumlArray
@@ -16,98 +12,62 @@
     to_cpu,
     to_gpu,
 )
-from cuml.internals.logger import warn
 from cuml.internals.mixins import FMajorInputTagMixin, RegressorMixin
 from cuml.linear_model.base import LinearPredictMixin
-from cuml.solvers import CD, QN
+from cuml.solvers import QN
+from cuml.solvers.cd import fit_coordinate_descent
 
 
 class ElasticNet(
     Base, InteropMixin, LinearPredictMixin, RegressorMixin, FMajorInputTagMixin
 ):
-
     """
-    ElasticNet extends LinearRegression with combined L1 and L2 regularizations
-    on the coefficients when predicting response y with a linear combination of
-    the predictors in X. It can reduce the variance of the predictors, force
-    some coefficients to be small, and improves the conditioning of the
-    problem.
-
-    cuML's ElasticNet an array-like object or cuDF DataFrame, uses coordinate
-    descent to fit a linear model.
-
-    Examples
-    --------
-
-    .. code-block:: python
-
-        >>> import cupy as cp
-        >>> import cudf
-        >>> from cuml.linear_model import ElasticNet
-        >>> enet = ElasticNet(alpha = 0.1, l1_ratio=0.5, solver='qn')
-        >>> X = cudf.DataFrame()
-        >>> X['col1'] = cp.array([0, 1, 2], dtype = cp.float32)
-        >>> X['col2'] = cp.array([0, 1, 2], dtype = cp.float32)
-        >>> y = cudf.Series(cp.array([0.0, 1.0, 2.0], dtype = cp.float32) )
-        >>> result_enet = enet.fit(X, y)
-        >>> print(result_enet.coef_)
-        0    0.445...
-        1    0.445...
-        dtype: float32
-        >>> print(result_enet.intercept_)
-        0.108433...
-        >>> X_new = cudf.DataFrame()
-        >>> X_new['col1'] = cp.array([3,2], dtype = cp.float32)
-        >>> X_new['col2'] = cp.array([5,5], dtype = cp.float32)
-        >>> preds = result_enet.predict(X_new)
-        >>> print(preds)
-        0    3.674...
-        1    3.228...
-        dtype: float32
+    Linear regression with combined L1 and L2 priors as regularizer.
 
     Parameters
     ----------
-    alpha : float (default = 1.0)
+    alpha : float, default=1.0
         Constant that multiplies the L1 term.
         alpha = 0 is equivalent to an ordinary least square, solved by the
         LinearRegression object.
         For numerical reasons, using alpha = 0 with the Lasso object is not
         advised.
         Given this, you should use the LinearRegression object.
-    l1_ratio : float (default = 0.5)
+    l1_ratio : float, default=0.5
         The ElasticNet mixing parameter, with 0 <= l1_ratio <= 1.
         For l1_ratio = 0 the penalty is an L2 penalty. For l1_ratio = 1 it is
         an L1 penalty.
         For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
-    fit_intercept : boolean (default = True)
+    fit_intercept : boolean, default=True
         If True, Lasso tries to correct for the global mean of y.
         If False, the model expects that you have centered the data.
-    normalize : boolean (default = False)
-        If True, the predictors in X will be normalized by dividing by the
-        column-wise standard deviation.
-        If False, no scaling will be done.
-        Note: this is in contrast to sklearn's deprecated `normalize` flag,
-        which divides by the column-wise L2 norm; but this is the same as if
-        using sklearn's StandardScaler.
-    max_iter : int (default = 1000)
+    max_iter : int, default=1000
         The maximum number of iterations
-    tol : float (default = 1e-3)
+    tol : float, default=1e-3
         The tolerance for the optimization: if the updates are smaller than
         tol, the optimization code checks the dual gap for optimality and
         continues until it is smaller than tol.
-    solver : {'cd', 'qn'} (default='cd')
+    solver : {'cd', 'qn'}, default='cd'
         Choose an algorithm:
 
           * 'cd' - coordinate descent
           * 'qn' - quasi-newton
 
         You may find the alternative 'qn' algorithm is faster when the number
-        of features is sufficiently large, but the sample size is small.
-    selection : {'cyclic', 'random'} (default='cyclic')
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default.
-        This (setting to 'random') often leads to significantly faster
-        convergence especially when tol is higher than 1e-4.
+        of features is sufficiently large but the sample size is small.
+    selection : {'cyclic', 'random'}, default='cyclic'
+        How selections are made when `solver="cd"`. If set to 'random', a
+        random coefficient is updated every iteration rather than looping over
+        features sequentially by default. This (setting to 'random') often
+        leads to significantly faster convergence especially when tol is higher
+        than 1e-4.
+    normalize : boolean, default=False
+        If True, the predictors in X will be normalized by dividing by the
+        column-wise standard deviation.
+        If False, no scaling will be done.
+        Note: this is in contrast to sklearn's deprecated `normalize` flag,
+        which divides by the column-wise L2 norm; but this is the same as if
+        using sklearn's StandardScaler.
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -129,13 +89,39 @@ class ElasticNet(
     ----------
     coef_ : array, shape (n_features)
         The estimated coefficients for the linear regression model.
-    intercept_ : array
-        The independent term. If `fit_intercept` is False, will be 0.
+    intercept_ : float
+        The independent term, will be 0 if `fit_intercept` is False.
 
     Notes
     -----
     For additional docs, see `scikitlearn's ElasticNet
     <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html>`_.
+
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> import cudf
+    >>> from cuml.linear_model import ElasticNet
+    >>> enet = ElasticNet(alpha = 0.1, l1_ratio=0.5, solver='qn')
+    >>> X = cudf.DataFrame()
+    >>> X['col1'] = cp.array([0, 1, 2], dtype = cp.float32)
+    >>> X['col2'] = cp.array([0, 1, 2], dtype = cp.float32)
+    >>> y = cudf.Series(cp.array([0.0, 1.0, 2.0], dtype = cp.float32) )
+    >>> result_enet = enet.fit(X, y)
+    >>> print(result_enet.coef_)
+    0    0.445...
+    1    0.445...
+    dtype: float32
+    >>> print(result_enet.intercept_)
+    0.108433...
+    >>> X_new = cudf.DataFrame()
+    >>> X_new['col1'] = cp.array([3,2], dtype = cp.float32)
+    >>> X_new['col2'] = cp.array([5,5], dtype = cp.float32)
+    >>> preds = result_enet.predict(X_new)
+    >>> print(preds)
+    0    3.674...
+    1    3.228...
+    dtype: float32
     """
 
     coef_ = CumlArrayDescriptor(order="F")
@@ -144,15 +130,16 @@ class ElasticNet(
 
     @classmethod
     def _get_param_names(cls):
-        return super()._get_param_names() + [
+        return [
+            *super()._get_param_names(),
             "alpha",
             "l1_ratio",
             "fit_intercept",
-            "normalize",
             "max_iter",
             "tol",
             "solver",
             "selection",
+            "normalize",
         ]
 
     @classmethod
@@ -209,156 +196,91 @@ def _attrs_to_cpu(self, model):
 
     def __init__(
         self,
-        *,
         alpha=1.0,
+        *,
         l1_ratio=0.5,
         fit_intercept=True,
-        normalize=False,
         max_iter=1000,
         tol=1e-3,
         solver="cd",
         selection="cyclic",
+        normalize=False,
         handle=None,
         output_type=None,
         verbose=False,
     ):
-        """
-        Initializes the elastic-net regression class.
-
-        Parameters
-        ----------
-        alpha : float or double.
-        l1_ratio : float or double.
-        fit_intercept: boolean.
-        normalize: boolean.
-        max_iter: int
-        tol: float or double.
-        solver: str, 'cd' or 'qn'
-        selection : str, 'cyclic', or 'random'
-
-        For additional docs, see `scikitlearn's ElasticNet
-        <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html>`_.
-        """
-
-        # Hard-code verbosity as CoordinateDescent does not have verbosity
         super().__init__(
             handle=handle, verbose=verbose, output_type=output_type
         )
 
-        self._check_alpha(alpha)
-        self._check_l1_ratio(l1_ratio)
-
         self.alpha = alpha
         self.l1_ratio = l1_ratio
         self.fit_intercept = fit_intercept
-        self.solver = solver
-        self.normalize = normalize
         self.max_iter = max_iter
         self.tol = tol
-        self.solver_model = None
-        if selection in ["cyclic", "random"]:
-            self.selection = selection
-        else:
-            msg = "selection {!r} is not supported"
-            raise TypeError(msg.format(selection))
+        self.solver = solver
+        self.selection = selection
+        self.normalize = normalize
 
-        self.intercept_value = 0.0
+    @generate_docstring()
+    def fit(
+        self, X, y, sample_weight=None, *, convert_dtype=True
+    ) -> "ElasticNet":
+        """
+        Fit the model with X and y.
 
-        shuffle = False
-        if self.selection == "random":
-            shuffle = True
+        """
+        if self.alpha < 0.0:
+            raise ValueError(f"Expected alpha >= 0, got {self.alpha}")
+        if self.selection not in ["cyclic", "random"]:
+            raise ValueError(f"selection {self.selection!r} is not supported")
+        if self.l1_ratio < 0.0 or self.l1_ratio > 1.0:
+            raise ValueError(
+                f"Expected 0.0 <= l1_ratio <= 1.0, got {self.l1_ratio}"
+            )
 
-        if solver == "qn":
-            pams = signature(self.__init__).parameters
-            if pams["selection"].default != selection:
-                warn(
-                    "Parameter 'selection' has no effect "
-                    "when 'qn' solver is used."
-                )
-            if pams["normalize"].default != normalize:
-                warn(
-                    "Parameter 'normalize' has no effect "
-                    "when 'qn' solver is used."
+        if self.solver == "qn":
+            if self.normalize:
+                raise ValueError(
+                    "`normalize=True` is not supported with `solver='qn'"
                 )
 
-            self.solver_model = QN(
+            solver = QN(
+                handle=self.handle,
+                verbose=self.verbose,
+                output_type=self.output_type,
                 fit_intercept=self.fit_intercept,
                 l1_strength=self.alpha * self.l1_ratio,
                 l2_strength=self.alpha * (1.0 - self.l1_ratio),
-                max_iter=self.max_iter,
-                handle=self.handle,
                 loss="l2",
-                tol=self.tol,
                 penalty_normalized=False,
-                verbose=self.verbose,
+                max_iter=self.max_iter,
+                tol=self.tol,
+            ).fit(
+                X, y, sample_weight=sample_weight, convert_dtype=convert_dtype
             )
-        elif solver == "cd":
-            self.solver_model = CD(
-                fit_intercept=self.fit_intercept,
-                normalize=self.normalize,
+
+            coef = CumlArray(data=solver.coef_.to_output("cupy").flatten())
+            intercept = solver.intercept_.item()
+        elif self.solver == "cd":
+            coef, intercept = fit_coordinate_descent(
+                X,
+                y,
+                sample_weight=sample_weight,
+                convert_dtype=convert_dtype,
                 alpha=self.alpha,
+                fit_intercept=self.fit_intercept,
                 l1_ratio=self.l1_ratio,
-                shuffle=shuffle,
+                normalize=self.normalize,
+                shuffle=self.selection == "random",
                 max_iter=self.max_iter,
-                handle=self.handle,
                 tol=self.tol,
+                handle=self.handle,
             )
         else:
-            raise TypeError(f"solver {solver} is not supported")
-
-    def _check_alpha(self, alpha):
-        if alpha <= 0.0:
-            msg = "alpha value has to be positive"
-            raise ValueError(msg.format(alpha))
-
-    def _check_l1_ratio(self, l1_ratio):
-        if l1_ratio < 0.0 or l1_ratio > 1.0:
-            msg = "l1_ratio value has to be between 0.0 and 1.0"
-            raise ValueError(msg.format(l1_ratio))
-
-    @generate_docstring()
-    def fit(
-        self, X, y, sample_weight=None, *, convert_dtype=True
-    ) -> "ElasticNet":
-        """
-        Fit the model with X and y.
-
-        """
-        X_m, _, self.n_features_in_, self.dtype = input_to_cuml_array(X)
-        y_m, _, _, _ = input_to_cuml_array(y)
-        if hasattr(X_m, "index"):
-            self.feature_names_in_ = X_m.index
-
-        # Check for multi-target regression
-        if (self.solver in ["cd", "qn"]) and y_m.ndim > 1 and y_m.shape[1] > 1:
-            raise ValueError(
-                f"The {self.solver} solver does not support "
-                "multi-target regression."
-            )
-
-        self.solver_model.fit(
-            X_m, y_m, convert_dtype=convert_dtype, sample_weight=sample_weight
-        )
-        if isinstance(self.solver_model, QN):
-            coefs = self.solver_model.coef_
-            self.coef_ = CumlArray(
-                data=coefs,
-                index=coefs._index,
-                dtype=coefs.dtype,
-                order=coefs.order,
-                shape=(coefs.shape[1],),
-            )
-            self.intercept_ = self.solver_model.intercept_.item()
-        else:
-            self.coef_ = self.solver_model.coef_
-            self.intercept_ = self.solver_model.intercept_
+            raise ValueError(f"solver {self.solver} is not supported")
 
-        return self
+        self.coef_ = coef
+        self.intercept_ = intercept
 
-    def set_params(self, **params):
-        super().set_params(**params)
-        if "selection" in params:
-            params.pop("selection")
-            params["shuffle"] = self.selection == "random"
-        self.solver_model.set_params(**params)
         return self
diff --git a/python/cuml/cuml/linear_model/lasso.py b/python/cuml/cuml/linear_model/lasso.py
index 3f7a2df8db..845e2ed187 100644
--- a/python/cuml/cuml/linear_model/lasso.py
+++ b/python/cuml/cuml/linear_model/lasso.py
@@ -7,47 +7,10 @@
 
 
 class Lasso(ElasticNet):
-
     """
-    Lasso extends LinearRegression by providing L1 regularization on the
-    coefficients when predicting response y with a linear combination of the
-    predictors in X. It can zero some of the coefficients for feature
-    selection and improves the conditioning of the problem.
-
-    cuML's Lasso can take array-like objects, either in host as
-    NumPy arrays or in device (as Numba or `__cuda_array_interface__`
-    compliant), in addition to cuDF objects. It uses coordinate descent to fit
-    a linear model.
-
-    Examples
-    --------
+    Linear Model trained with L1 prior as regularizer (aka the Lasso).
 
-    .. code-block:: python
-
-        >>> import numpy as np
-        >>> import cudf
-        >>> from cuml.linear_model import Lasso
-        >>> ls = Lasso(alpha = 0.1, solver='qn')
-        >>> X = cudf.DataFrame()
-        >>> X['col1'] = np.array([0, 1, 2], dtype = np.float32)
-        >>> X['col2'] = np.array([0, 1, 2], dtype = np.float32)
-        >>> y = cudf.Series( np.array([0.0, 1.0, 2.0], dtype = np.float32) )
-        >>> result_lasso = ls.fit(X, y)
-        >>> print(result_lasso.coef_)
-        0   0.425
-        1   0.425
-        dtype: float32
-        >>> print(result_lasso.intercept_)
-        0.150000...
-
-        >>> X_new = cudf.DataFrame()
-        >>> X_new['col1'] = np.array([3,2], dtype = np.float32)
-        >>> X_new['col2'] = np.array([5,5], dtype = np.float32)
-        >>> preds = result_lasso.predict(X_new)
-        >>> print(preds)
-        0   3.549997
-        1   3.124997
-        dtype: float32
+    This is the same as ``ElasticNet(l1_ratio=1.0)`` (no L2 penalty).
 
     Parameters
     ----------
@@ -61,13 +24,6 @@ class Lasso(ElasticNet):
     fit_intercept : boolean (default = True)
         If True, Lasso tries to correct for the global mean of y.
         If False, the model expects that you have centered the data.
-    normalize : boolean (default = False)
-        If True, the predictors in X will be normalized by dividing by the
-        column-wise standard deviation.
-        If False, no scaling will be done.
-        Note: this is in contrast to sklearn's deprecated `normalize` flag,
-        which divides by the column-wise L2 norm; but this is the same as if
-        using sklearn's StandardScaler.
     max_iter : int (default = 1000)
         The maximum number of iterations
     tol : float (default = 1e-3)
@@ -87,6 +43,13 @@ class Lasso(ElasticNet):
         rather than looping over features sequentially by default.
         This (setting to 'random') often leads to significantly faster
         convergence especially when tol is higher than 1e-4.
+    normalize : boolean (default = False)
+        If True, the predictors in X will be normalized by dividing by the
+        column-wise standard deviation.
+        If False, no scaling will be done.
+        Note: this is in contrast to sklearn's deprecated `normalize` flag,
+        which divides by the column-wise L2 norm; but this is the same as if
+        using sklearn's StandardScaler.
     handle : cuml.Handle
         Specifies the cuml.handle that holds internal CUDA state for
         computations in this model. Most importantly, this specifies the CUDA
@@ -115,6 +78,33 @@ class Lasso(ElasticNet):
     -----
     For additional docs, see `scikitlearn's Lasso
     <https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html>`_.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import cudf
+    >>> from cuml.linear_model import Lasso
+    >>> ls = Lasso(alpha = 0.1, solver='qn')
+    >>> X = cudf.DataFrame()
+    >>> X['col1'] = np.array([0, 1, 2], dtype = np.float32)
+    >>> X['col2'] = np.array([0, 1, 2], dtype = np.float32)
+    >>> y = cudf.Series( np.array([0.0, 1.0, 2.0], dtype = np.float32) )
+    >>> result_lasso = ls.fit(X, y)
+    >>> print(result_lasso.coef_)
+    0   0.425
+    1   0.425
+    dtype: float32
+    >>> print(result_lasso.intercept_)
+    0.150000...
+
+    >>> X_new = cudf.DataFrame()
+    >>> X_new['col1'] = np.array([3,2], dtype = np.float32)
+    >>> X_new['col2'] = np.array([5,5], dtype = np.float32)
+    >>> preds = result_lasso.predict(X_new)
+    >>> print(preds)
+    0   3.549997
+    1   3.124997
+    dtype: float32
     """
 
     _cpu_class_path = "sklearn.linear_model.Lasso"
@@ -136,28 +126,28 @@ def _params_to_cpu(self):
 
     def __init__(
         self,
-        *,
         alpha=1.0,
+        *,
         fit_intercept=True,
-        normalize=False,
         max_iter=1000,
         tol=1e-3,
         solver="cd",
         selection="cyclic",
+        normalize=False,
         handle=None,
         output_type=None,
         verbose=False,
     ):
         # Lasso is just a special case of ElasticNet
         super().__init__(
-            l1_ratio=1.0,
             alpha=alpha,
+            l1_ratio=1.0,
             fit_intercept=fit_intercept,
-            normalize=normalize,
             max_iter=max_iter,
             tol=tol,
             solver=solver,
             selection=selection,
+            normalize=normalize,
             handle=handle,
             output_type=output_type,
             verbose=verbose,
diff --git a/python/cuml/cuml/solvers/cd.pyx b/python/cuml/cuml/solvers/cd.pyx
index f3ec7b9ab8..cb15781288 100644
--- a/python/cuml/cuml/solvers/cd.pyx
+++ b/python/cuml/cuml/solvers/cd.pyx
@@ -1,13 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
-
-# distutils: language = c++
-
 import numpy as np
 
-from libc.stdint cimport uintptr_t
-
 from cuml.common import CumlArray
 from cuml.common.array_descriptor import CumlArrayDescriptor
 from cuml.common.doc_utils import generate_docstring
@@ -15,12 +10,14 @@ from cuml.internals.base import Base
 from cuml.internals.input_utils import input_to_cuml_array
 from cuml.internals.mixins import FMajorInputTagMixin
 
+from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 from pylibraft.common.handle cimport handle_t
 
+__all__ = ("fit_coordinate_descent", "CD")
 
-cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil:
 
+cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil:
     cdef void cdFit(handle_t& handle,
                     float *input,
                     int n_rows,
@@ -74,8 +71,149 @@ cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver" nogil:
                         int loss) except +
 
 
-class CD(Base,
-         FMajorInputTagMixin):
+def fit_coordinate_descent(
+    X,
+    y,
+    sample_weight=None,
+    *,
+    convert_dtype=True,
+    loss="squared_loss",
+    double alpha=0.0001,
+    double l1_ratio=0.15,
+    bool fit_intercept=True,
+    bool normalize=False,
+    int max_iter=1000,
+    double tol=1e-3,
+    bool shuffle=True,
+    handle=None,
+):
+    """Fit a linear model using coordinate descent.
+
+    Parameters
+    ----------
+    X : array-like, shape=(n_samples, n_features)
+        The training data.
+    y : array-like, shape=(n_samples,)
+        The target values.
+    sample_weight : None or array-like, shape=(n_samples,)
+        The sample weights.
+    convert_to_dtype : bool, default=True
+        When set to True, will convert array inputs to be of the proper dtypes.
+    **kwargs
+        Remaining keyword arguments match the hyperparameters
+        to ``CD``, see the ``CD`` docs for more information.
+
+    Returns
+    -------
+    coef : CumlArray, shape=(n_features,)
+        The fit coefficients
+    intercept : float
+        The fit intercept, or 0 if `fit_intercept=False`
+    """
+    # Process and validate parameters
+    if loss != "squared_loss":
+        raise ValueError(f"{loss=!r} is not supported")
+
+    if alpha < 0.0:
+        raise ValueError(f"Expected alpha >= 0, got {alpha}")
+
+    # Process and validate input arrays
+    cdef int n_rows, n_cols
+    X, n_rows, n_cols, _ = input_to_cuml_array(
+        X,
+        convert_to_dtype=(np.float32 if convert_dtype else None),
+        check_dtype=[np.float32, np.float64],
+    )
+
+    if n_rows < 2:
+        raise ValueError(
+            f"Found array with {n_rows} sample(s) (shape={X.shape}) while a "
+            f"minimum of 2 is required."
+        )
+    if n_cols < 1:
+        raise ValueError(
+            f"Found array with {n_cols} feature(s) (shape={X.shape}) while "
+            f"a minimum of 1 is required."
+        )
+
+    y = input_to_cuml_array(
+        y,
+        check_dtype=X.dtype,
+        convert_to_dtype=(X.dtype if convert_dtype else None),
+        check_rows=n_rows,
+        check_cols=1,
+    ).array
+
+    if sample_weight is not None:
+        sample_weight = input_to_cuml_array(
+            sample_weight,
+            check_dtype=X.dtype,
+            convert_to_dtype=(X.dtype if convert_dtype else None),
+            check_rows=n_rows,
+            check_cols=1,
+        ).array
+
+    # Allocate outputs
+    coef = CumlArray.zeros(n_cols, dtype=X.dtype)
+
+    cdef uintptr_t X_ptr = X.ptr
+    cdef uintptr_t y_ptr = y.ptr
+    cdef uintptr_t sample_weight_ptr = (
+        0 if sample_weight is None else sample_weight.ptr
+    )
+    cdef uintptr_t coef_ptr = coef.ptr
+
+    cdef float intercept_f32
+    cdef double intercept_f64
+    cdef handle_t* handle_ = <handle_t*><size_t>handle.getHandle()
+    cdef bool is_float32 = X.dtype == np.float32
+
+    # Perform fit
+    with nogil:
+        if is_float32:
+            cdFit(
+                handle_[0],
+                <float*>X_ptr,
+                n_rows,
+                n_cols,
+                <float*>y_ptr,
+                <float*>coef_ptr,
+                &intercept_f32,
+                fit_intercept,
+                normalize,
+                max_iter,
+                0,
+                <float>alpha,
+                <float>l1_ratio,
+                shuffle,
+                <float>tol,
+                <float*>sample_weight_ptr
+            )
+        else:
+            cdFit(
+                handle_[0],
+                <double*>X_ptr,
+                n_rows,
+                n_cols,
+                <double*>y_ptr,
+                <double*>coef_ptr,
+                &intercept_f64,
+                fit_intercept,
+                normalize,
+                max_iter,
+                0,
+                alpha,
+                l1_ratio,
+                shuffle,
+                tol,
+                <double*>sample_weight_ptr
+            )
+    handle.sync()
+
+    return coef, intercept_f32 if is_float32 else intercept_f64
+
+
+class CD(Base, FMajorInputTagMixin):
     """
     Coordinate Descent (CD) is a very common optimization algorithm that
     minimizes along coordinate directions to find the minimum of a function.
@@ -84,40 +222,6 @@ class CD(Base,
     input dataset.algorithm The CD algorithm currently works with linear
     regression and ridge, lasso, and elastic-net penalties.
 
-    Examples
-    --------
-    .. code-block:: python
-
-        >>> import cupy as cp
-        >>> import cudf
-        >>> from cuml.solvers import CD as cumlCD
-
-        >>> cd = cumlCD(alpha=0.0)
-
-        >>> X = cudf.DataFrame()
-        >>> X['col1'] = cp.array([1,1,2,2], dtype=cp.float32)
-        >>> X['col2'] = cp.array([1,2,2,3], dtype=cp.float32)
-
-        >>> y = cudf.Series(cp.array([6.0, 8.0, 9.0, 11.0], dtype=cp.float32))
-
-        >>> cd.fit(X,y)
-        CD()
-        >>> print(cd.coef_) # doctest: +SKIP
-        0 1.001...
-        1 1.998...
-        dtype: float32
-        >>> print(cd.intercept_) # doctest: +SKIP
-        3.00...
-        >>> X_new = cudf.DataFrame()
-        >>> X_new['col1'] = cp.array([3,2], dtype=cp.float32)
-        >>> X_new['col2'] = cp.array([5,5], dtype=cp.float32)
-
-        >>> preds = cd.predict(X_new)
-        >>> print(preds) # doctest: +SKIP
-        0 15.997...
-        1 14.995...
-        dtype: float32
-
     Parameters
     ----------
     loss : 'squared_loss'
@@ -165,21 +269,59 @@ class CD(Base,
         (`cuml.global_settings.output_type`) will be used. See
         :ref:`output-data-type-configuration` for more info.
 
+    Examples
+    --------
+    >>> import cupy as cp
+    >>> import cudf
+    >>> from cuml.solvers import CD
+
+    >>> cd = CD(alpha=0.0)
+
+    >>> X = cudf.DataFrame()
+    >>> X['col1'] = cp.array([1,1,2,2], dtype=cp.float32)
+    >>> X['col2'] = cp.array([1,2,2,3], dtype=cp.float32)
+
+    >>> y = cudf.Series(cp.array([6.0, 8.0, 9.0, 11.0], dtype=cp.float32))
+
+    >>> cd.fit(X,y)
+    CD()
+    >>> print(cd.coef_) # doctest: +SKIP
+    0 1.001...
+    1 1.998...
+    dtype: float32
+    >>> print(cd.intercept_) # doctest: +SKIP
+    3.00...
+    >>> X_new = cudf.DataFrame()
+    >>> X_new['col1'] = cp.array([3,2], dtype=cp.float32)
+    >>> X_new['col2'] = cp.array([5,5], dtype=cp.float32)
+
+    >>> preds = cd.predict(X_new)
+    >>> print(preds) # doctest: +SKIP
+    0 15.997...
+    1 14.995...
+    dtype: float32
     """
-
     coef_ = CumlArrayDescriptor()
 
+    @classmethod
+    def _get_param_names(cls):
+        return [
+            *super()._get_param_names(),
+            "loss",
+            "alpha",
+            "l1_ratio",
+            "fit_intercept",
+            "normalize",
+            "max_iter",
+            "tol",
+            "shuffle",
+        ]
+
     def __init__(self, *, loss='squared_loss', alpha=0.0001, l1_ratio=0.15,
                  fit_intercept=True, normalize=False, max_iter=1000, tol=1e-3,
                  shuffle=True, handle=None, output_type=None, verbose=False):
 
-        if loss not in ['squared_loss']:
-            msg = "loss {!r} is not supported"
-            raise NotImplementedError(msg.format(loss))
-
-        super().__init__(handle=handle,
-                         verbose=verbose,
-                         output_type=output_type)
+        super().__init__(handle=handle, verbose=verbose, output_type=output_type)
 
         self.loss = loss
         self.alpha = alpha
@@ -189,107 +331,29 @@ class CD(Base,
         self.max_iter = max_iter
         self.tol = tol
         self.shuffle = shuffle
-        self.intercept_value = 0.0
-        self.coef_ = None
-        self.intercept_ = None
-
-    def _check_alpha(self, alpha):
-        for el in alpha:
-            if el <= 0.0:
-                msg = "alpha values have to be positive"
-                raise TypeError(msg.format(alpha))
-
-    def _get_loss_int(self):
-        return {
-            'squared_loss': 0,
-        }[self.loss]
 
     @generate_docstring()
     def fit(self, X, y, convert_dtype=True, sample_weight=None) -> "CD":
         """
         Fit the model with X and y.
-
         """
-        cdef uintptr_t sample_weight_ptr
-        X_m, n_rows, self.n_cols, self.dtype = \
-            input_to_cuml_array(X,
-                                convert_to_dtype=(np.float32 if convert_dtype
-                                                  else None),
-                                check_dtype=[np.float32, np.float64])
-
-        y_m, *_ = \
-            input_to_cuml_array(y, check_dtype=self.dtype,
-                                convert_to_dtype=(self.dtype if convert_dtype
-                                                  else None),
-                                check_rows=n_rows, check_cols=1)
-
-        if sample_weight is not None:
-            sample_weight_m, _, _, _ = \
-                input_to_cuml_array(sample_weight, check_dtype=self.dtype,
-                                    convert_to_dtype=(
-                                        self.dtype if convert_dtype else None),
-                                    check_rows=n_rows, check_cols=1)
-            sample_weight_ptr = sample_weight_m.ptr
-        else:
-            sample_weight_ptr = 0
-
-        cdef uintptr_t _X_ptr = X_m.ptr
-        cdef uintptr_t _y_ptr = y_m.ptr
-
-        self.n_alpha = 1
-
-        self.coef_ = CumlArray.zeros(self.n_cols, dtype=self.dtype)
-        cdef uintptr_t _coef_ptr = self.coef_.ptr
-
-        cdef float _c_intercept_f32
-        cdef double _c_intercept2_f64
-
-        cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
-
-        if self.dtype == np.float32:
-            cdFit(handle_[0],
-                  <float*>_X_ptr,
-                  <int>n_rows,
-                  <int>self.n_cols,
-                  <float*>_y_ptr,
-                  <float*>_coef_ptr,
-                  <float*>&_c_intercept_f32,
-                  <bool>self.fit_intercept,
-                  <bool>self.normalize,
-                  <int>self.max_iter,
-                  <int>self._get_loss_int(),
-                  <float>self.alpha,
-                  <float>self.l1_ratio,
-                  <bool>self.shuffle,
-                  <float>self.tol,
-                  <float*>sample_weight_ptr)
-
-            self.intercept_ = _c_intercept_f32
-        else:
-            cdFit(handle_[0],
-                  <double*>_X_ptr,
-                  <int>n_rows,
-                  <int>self.n_cols,
-                  <double*>_y_ptr,
-                  <double*>_coef_ptr,
-                  <double*>&_c_intercept2_f64,
-                  <bool>self.fit_intercept,
-                  <bool>self.normalize,
-                  <int>self.max_iter,
-                  <int>self._get_loss_int(),
-                  <double>self.alpha,
-                  <double>self.l1_ratio,
-                  <bool>self.shuffle,
-                  <double>self.tol,
-                  <double*>sample_weight_ptr)
-
-            self.intercept_ = _c_intercept2_f64
-
-        self.handle.sync()
-        del X_m
-        del y_m
-        if sample_weight is not None:
-            del sample_weight_m
+        coef, intercept = fit_coordinate_descent(
+            X,
+            y,
+            sample_weight=sample_weight,
+            convert_dtype=convert_dtype,
+            loss=self.loss,
+            alpha=self.alpha,
+            l1_ratio=self.l1_ratio,
+            fit_intercept=self.fit_intercept,
+            normalize=self.normalize,
+            max_iter=self.max_iter,
+            tol=self.tol,
+            shuffle=self.shuffle,
+            handle=self.handle,
+        )
+        self.coef_ = coef
+        self.intercept_ = intercept
 
         return self
 
@@ -300,57 +364,47 @@ class CD(Base,
     def predict(self, X, convert_dtype=True) -> CumlArray:
         """
         Predicts the y for X.
-
         """
-        X_m, n_rows, _n_cols, _ = \
-            input_to_cuml_array(X, check_dtype=self.dtype,
-                                convert_to_dtype=(self.dtype if convert_dtype
-                                                  else None),
-                                check_cols=self.n_cols)
-
-        cdef uintptr_t _X_ptr = X_m.ptr
-        cdef uintptr_t _coef_ptr = self.coef_.ptr
-
-        preds = CumlArray.zeros(n_rows, dtype=self.dtype,
-                                index=X_m.index)
-        cdef uintptr_t _preds_ptr = preds.ptr
-
+        cdef int n_rows, n_cols
+        X, n_rows, n_cols, _ = input_to_cuml_array(
+            X,
+            check_dtype=self.coef_.dtype,
+            convert_to_dtype=(self.coef_.dtype if convert_dtype else None),
+            check_cols=self.coef_.shape[0],
+        )
+
+        preds = CumlArray.zeros(n_rows, dtype=self.coef_.dtype, index=X.index)
+
+        cdef uintptr_t X_ptr = X.ptr
+        cdef uintptr_t preds_ptr = preds.ptr
+        cdef uintptr_t coef_ptr = self.coef_.ptr
+        cdef double intercept = self.intercept_
         cdef handle_t* handle_ = <handle_t*><size_t>self.handle.getHandle()
-
-        if self.dtype == np.float32:
-            cdPredict(handle_[0],
-                      <float*>_X_ptr,
-                      <int>n_rows,
-                      <int>_n_cols,
-                      <float*>_coef_ptr,
-                      <float>self.intercept_,
-                      <float*>_preds_ptr,
-                      <int>self._get_loss_int())
-        else:
-            cdPredict(handle_[0],
-                      <double*>_X_ptr,
-                      <int>n_rows,
-                      <int>_n_cols,
-                      <double*>_coef_ptr,
-                      <double>self.intercept_,
-                      <double*>_preds_ptr,
-                      <int>self._get_loss_int())
-
+        cdef bool is_float32 = self.coef_.dtype == np.float32
+
+        with nogil:
+            if is_float32:
+                cdPredict(
+                    handle_[0],
+                    <float*>X_ptr,
+                    n_rows,
+                    n_cols,
+                    <float*>coef_ptr,
+                    <float>intercept,
+                    <float*>preds_ptr,
+                    0,
+                )
+            else:
+                cdPredict(
+                    handle_[0],
+                    <double*>X_ptr,
+                    n_rows,
+                    n_cols,
+                    <double*>coef_ptr,
+                    intercept,
+                    <double*>preds_ptr,
+                    0,
+                )
         self.handle.sync()
 
-        del X_m
-
         return preds
-
-    @classmethod
-    def _get_param_names(cls):
-        return super()._get_param_names() + [
-            "loss",
-            "alpha",
-            "l1_ratio",
-            "fit_intercept",
-            "normalize",
-            "max_iter",
-            "tol",
-            "shuffle",
-        ]
diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
index b433525b29..f7e42f956a 100644
--- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
+++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml
@@ -261,12 +261,6 @@
   - "sklearn.linear_model.tests.test_ridge::test_ridge_sample_weight_consistency[42-saga-wide-csr_matrix-False]"
   - "sklearn.linear_model.tests.test_ridge::test_ridgecv_sample_weight"
   - "sklearn.linear_model.tests.test_sag::test_step_size_alpha_error"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_explicit_sparse_input[lil_array]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_explicit_sparse_input[lil_matrix]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_array-False]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_array-True]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_matrix-False]"
-  - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_enet_toy_list_input[csc_matrix-True]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_array]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_same_multiple_output_sparse_dense[coo_matrix]"
   - "sklearn.linear_model.tests.test_sparse_coordinate_descent::test_sparse_dense_equality[csc_array-False-6-24-False-ElasticNet]"
@@ -1301,10 +1295,8 @@
   - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_sample_weights_not_an_array]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_complex_data]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_dtype_object]"
-  - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_estimators_empty_data_messages]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_estimators_nan_inf]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit1d]"
-  - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit2d_1sample]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit2d_predict1d]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_non_transformer_estimators_n_iter]"
   - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_regressor_data_not_an_array]"
@@ -1340,10 +1332,8 @@
   - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_supervised_y_no_nan]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_complex_data]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_dtype_object]"
-  - "sklearn.tests.test_common::test_estimators[Lasso()-check_estimators_empty_data_messages]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_estimators_nan_inf]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit1d]"
-  - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit2d_1sample]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit2d_predict1d]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_non_transformer_estimators_n_iter]"
   - "sklearn.tests.test_common::test_estimators[Lasso()-check_regressor_data_not_an_array]"
diff --git a/python/cuml/tests/test_linear_model.py b/python/cuml/tests/test_linear_model.py
index 1ed0e5e1bf..df29502c0c 100644
--- a/python/cuml/tests/test_linear_model.py
+++ b/python/cuml/tests/test_linear_model.py
@@ -1220,10 +1220,7 @@ def test_elasticnet_model(datatype, solver, nrows, column_info, ntargets):
     cuelastic = cuElasticNet(alpha=0.1, l1_ratio=0.5, solver=solver)
 
     if ntargets > 1:
-        with pytest.raises(
-            ValueError,
-            match="The .* solver does not support multi-target regression.",
-        ):
+        with pytest.raises(ValueError, match="Expected 1 columns"):
             cuelastic.fit(X_train, y_train)
         return
 
diff --git a/python/cuml/tests/test_sklearn_compatibility.py b/python/cuml/tests/test_sklearn_compatibility.py
index 6584a53259..7bd4d77186 100644
--- a/python/cuml/tests/test_sklearn_compatibility.py
+++ b/python/cuml/tests/test_sklearn_compatibility.py
@@ -542,7 +542,6 @@
     Lasso: {
         "check_estimator_tags_renamed": "No support for modern tags infrastructure",
         "check_no_attributes_set_in_init": "Lasso sets attributes during init",
-        "check_dont_overwrite_parameters": "Lasso overwrites parameters during fit",
         "check_estimators_unfitted": "Lasso does not raise NotFittedError before fit",
         "check_do_not_raise_errors_in_init_or_set_params": "Lasso raises errors in init or set_params",
         "check_n_features_in_after_fitting": "Lasso does not check n_features_in consistency",
@@ -550,7 +549,6 @@
         "check_sample_weights_list": "Lasso does not handle list sample weights",
         "check_complex_data": "Lasso does not handle complex data",
         "check_dtype_object": "Lasso does not handle object dtype",
-        "check_estimators_empty_data_messages": "Lasso does not handle empty data",
         "check_estimators_nan_inf": "Lasso does not check for NaN and inf",
         "check_estimator_sparse_tag": "Lasso does not support sparse data",
         "check_estimator_sparse_array": "Lasso does not handle sparse arrays gracefully",
@@ -563,8 +561,6 @@
         "check_supervised_y_no_nan": "Lasso does not check for NaN in y",
         "check_non_transformer_estimators_n_iter": "Lasso does not have n_iter_ attribute",
         "check_parameters_default_constructible": "Lasso parameters are mutated on init",
-        "check_fit2d_1sample": "Lasso does not handle single sample",
-        "check_set_params": "Lasso does not handle set_params properly",
         "check_fit1d": "Lasso does not raise ValueError for 1D input",
         "check_fit2d_predict1d": "Lasso does not handle 1D prediction input gracefully",
         "check_requires_y_none": "Lasso does not handle y=None",
@@ -572,7 +568,6 @@
     ElasticNet: {
         "check_estimator_tags_renamed": "No support for modern tags infrastructure",
         "check_no_attributes_set_in_init": "ElasticNet sets attributes during init",
-        "check_dont_overwrite_parameters": "ElasticNet overwrites parameters during fit",
         "check_estimators_unfitted": "ElasticNet does not raise NotFittedError before fit",
         "check_do_not_raise_errors_in_init_or_set_params": "ElasticNet raises errors in init or set_params",
         "check_n_features_in_after_fitting": "ElasticNet does not check n_features_in consistency",
@@ -580,7 +575,6 @@
         "check_sample_weights_list": "ElasticNet does not handle list sample weights",
         "check_complex_data": "ElasticNet does not handle complex data",
         "check_dtype_object": "ElasticNet does not handle object dtype",
-        "check_estimators_empty_data_messages": "ElasticNet does not handle empty data",
         "check_estimators_nan_inf": "ElasticNet does not check for NaN and inf",
         "check_estimator_sparse_tag": "ElasticNet does not support sparse data",
         "check_estimator_sparse_array": "ElasticNet does not handle sparse arrays gracefully",
@@ -593,8 +587,6 @@
         "check_supervised_y_no_nan": "ElasticNet does not check for NaN in y",
         "check_non_transformer_estimators_n_iter": "ElasticNet does not have n_iter_ attribute",
         "check_parameters_default_constructible": "ElasticNet parameters are mutated on init",
-        "check_fit2d_1sample": "ElasticNet does not handle single sample",
-        "check_set_params": "ElasticNet does not handle set_params properly",
         "check_fit1d": "ElasticNet does not raise ValueError for 1D input",
         "check_fit2d_predict1d": "ElasticNet does not handle 1D prediction input gracefully",
         "check_requires_y_none": "ElasticNet does not handle y=None",
diff --git a/python/cuml/tests/test_solver_attributes.py b/python/cuml/tests/test_solver_attributes.py
index eb86773864..9d1bddd89a 100644
--- a/python/cuml/tests/test_solver_attributes.py
+++ b/python/cuml/tests/test_solver_attributes.py
@@ -75,14 +75,9 @@ def test_elastic_net_attributes():
     clf.fit(X, y)
 
     attrs = [
-        "dtype",
-        "solver_model",
+        "n_features_in_",
         "coef_",
         "intercept_",
-        "l1_ratio",
-        "alpha",
-        "max_iter",
-        "fit_intercept",
     ]
     for attr in attrs:
         assert hasattr(clf, attr)
@@ -94,12 +89,9 @@ def test_lasso_attributes():
     clf.fit(X, y)
 
     attrs = [
-        "dtype",
-        "solver_model",
+        "n_features_in_",
         "coef_",
         "intercept_",
-        "solver_model",
-        "l1_ratio",
     ]
     for attr in attrs:
         assert hasattr(clf, attr)