rapidsai · rapids-bot · Feb 10, 2026 · Feb 3, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -68,6 +68,7 @@ the following estimators are mostly or entirely accelerated when run with
     * ``sklearn.neighbors.KNeighborsClassifier``
     * ``sklearn.neighbors.KNeighborsRegressor``
     * ``sklearn.neighbors.KernelDensity``
+    * ``sklearn.preprocessing.StandardScaler``
     * ``sklearn.preprocessing.TargetEncoder``
     * ``sklearn.svm.SVC``
     * ``sklearn.svm.SVR``

@@ -402,6 +402,16 @@ Additional notes:
 sklearn.preprocessing
 ---------------------
 
+StandardScaler
+^^^^^^^^^^^^^^
+
+``StandardScaler`` will fall back to CPU in the following cases:
+
+- If ``partial_fit`` is called (incremental learning not supported on GPU).
+- If ``sample_weight`` is provided (weighted statistics not supported on GPU).
+- If ``X`` has object dtype, half precision (float16) dtype, or complex dtype (``complex64``, ``complex128``).
+- If ``X`` is a sparse matrix with integer dtype or in a format other than CSR or CSC.
+
 TargetEncoder
 ^^^^^^^^^^^^^
 

@@ -5,7 +5,7 @@
 # SPDX-FileCopyrightText: Eric Martin <eric@ericmart.in>
 # SPDX-FileCopyrightText: Giorgio Patrini <giorgio.patrini@anu.edu.au>
 # SPDX-FileCopyrightText: Eric Chang <ericchang2017@u.northwestern.edu>
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
 
 # Original authors from Sckit-Learn:
@@ -40,6 +40,7 @@
     SparseInputTagMixin,
     StatelessTagMixin,
 )
+from cuml.internals.interop import InteropMixin, to_cpu, to_gpu
 
 from ....common.array_descriptor import CumlArrayDescriptor
 from ....internals.array import CumlArray
@@ -519,6 +520,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
 
 class StandardScaler(TransformerMixin,
                      BaseEstimator,
+                     InteropMixin,
                      AllowNaNTagMixin,
                      SparseInputTagMixin):
     """Standardize features by removing the mean and scaling to unit variance
@@ -658,6 +660,47 @@ def _get_param_names(cls):
             "copy"
         ]
 
+    # InteropMixin requirements
+    _cpu_class_path = "sklearn.preprocessing.StandardScaler"
+
+    @classmethod
+    def _params_from_cpu(cls, model):
+        """Convert sklearn StandardScaler hyperparameters to cuML format."""
+        return {
+            "copy": model.copy,
+            "with_mean": model.with_mean,
+            "with_std": model.with_std,
+        }
+
+    def _params_to_cpu(self):
+        """Convert cuML StandardScaler hyperparameters to sklearn format."""
+        return {
+            "copy": self.copy,
+            "with_mean": self.with_mean,
+            "with_std": self.with_std,
+        }
+
+    def _attrs_from_cpu(self, model):
+        """Convert sklearn StandardScaler fitted attributes to cuML format."""
+        attrs = {
+            "mean_": to_gpu(mean) if (mean := getattr(model, "mean_", None)) is not None else None,
+            "var_": to_gpu(var) if (var := getattr(model, "var_", None)) is not None else None,
+            "scale_": to_gpu(scale) if (scale := getattr(model, "scale_", None)) is not None else None,
+            "n_samples_seen_": to_gpu(nss) if (nss := getattr(model, "n_samples_seen_", None)) is not None else None,
+        }
+        return {**attrs, **super()._attrs_from_cpu(model)}
+
+    def _attrs_to_cpu(self, model):
+        """Convert cuML StandardScaler fitted attributes to sklearn format."""
+
+        attrs = {
+            "mean_": to_cpu(mean) if (mean := getattr(self, "mean_", None)) is not None else None,
+            "var_": to_cpu(var) if (var := getattr(self, "var_", None)) is not None else None,
+            "scale_": to_cpu(scale) if (scale := getattr(self, "scale_", None)) is not None else None,
+            "n_samples_seen_": None if (nss := getattr(self, "n_samples_seen_", None)) is None else cpu_np.int64(nss) if cpu_np.isscalar(nss) else to_cpu(nss),
+        }
+        return {**attrs, **super()._attrs_to_cpu(model)}
+
     @reflect(reset=True)
     def fit(self, X, y=None) -> "StandardScaler":
         """Compute the mean and std to be used for later scaling.

@@ -65,7 +65,7 @@ def _check_n_features(self, X, reset):
                 )
             if n_features != self.n_features_in_:
                 raise ValueError(
-                    'X has {} features, but this {} is expecting {} features '
+                    'X has {} features, but {} is expecting {} features '
                     'as input.'.format(n_features, self.__class__.__name__,
                                        self.n_features_in_)
                 )

@@ -3,13 +3,77 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+import cupyx.scipy.sparse as cupy_sparse
 import numpy as np
+from scipy import sparse as sp_sparse
 
 import cuml.preprocessing
 from cuml.accel.estimator_proxy import ProxyBase
 from cuml.internals.interop import UnsupportedOnGPU
 
-__all__ = ("TargetEncoder",)
+__all__ = ("StandardScaler", "TargetEncoder")
+
+
+def _check_standardscaler_unsupported_inputs(X, **kwargs):
+    """Check if inputs are supported by cuML's StandardScaler on GPU.
+
+    Raises UnsupportedOnGPU for unsupported cases to trigger CPU fallback.
+    """
+    if "sample_weight" in kwargs:
+        raise UnsupportedOnGPU("sample_weight parameter not supported")
+
+    # Reject complex, object, and float16 dtypes
+    if hasattr(X, "dtype"):
+        if np.issubdtype(X.dtype, np.complexfloating):
+            raise UnsupportedOnGPU("Complex data types not supported")
+        if X.dtype == np.object_:
+            raise UnsupportedOnGPU("Object dtype not supported")
+        if X.dtype == np.float16:
+            raise UnsupportedOnGPU(
+                "float16 dtype not supported on GPU (output would not preserve dtype)"
+            )
+
+    # Check for sparse matrices with unsupported properties
+    if sp_sparse.issparse(X):
+        # cupy sparse doesn't support int64 dtype
+        if X.dtype == np.int64:
+            raise UnsupportedOnGPU(
+                "Sparse matrices with int64 dtype not supported on GPU "
+                "(cupy sparse only supports float32, float64, complex64, complex128, bool)"
+            )
+        # cuML's StandardScaler algorithm only supports CSR/CSC formats.
+        if X.format not in ("csr", "csc"):
+            raise UnsupportedOnGPU(
+                f"Sparse matrix format '{X.format}' not supported on GPU "
+                "(only CSR and CSC formats are supported)"
+            )
+    elif cupy_sparse.issparse(X):
+        # Check CuPy sparse matrices (not caught by scipy.sparse.issparse)
+        # cuML's StandardScaler algorithm only supports CSR/CSC formats.
+        if X.format not in ("csr", "csc"):
+            raise UnsupportedOnGPU(
+                f"CuPy sparse matrix format '{X.format}' not supported "
+                "(only CSR and CSC formats are supported)"
+            )
+
+
+class StandardScaler(ProxyBase):
+    _gpu_class = cuml.preprocessing.StandardScaler
+
+    def _gpu_fit(self, X, y=None, sample_weight=None):
+        kwargs = (
+            {} if sample_weight is None else {"sample_weight": sample_weight}
+        )
+        _check_standardscaler_unsupported_inputs(X, **kwargs)
+        return self._gpu.fit(X, y)
+
+    def _gpu_fit_transform(self, X, y=None, **fit_params):
+        _check_standardscaler_unsupported_inputs(X, **fit_params)
+        return self._gpu.fit_transform(X, y, **fit_params)
+
+    def _gpu_partial_fit(self, X, y=None, sample_weight=None):
+        """partial_fit not supported on GPU - always fall back to CPU."""
+        raise UnsupportedOnGPU("partial_fit not supported on GPU")
 
 
 def _check_unsupported_inputs(X, y, cpu_model):

@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -8,7 +8,11 @@
 
 import sklearn
 from packaging.version import Version
-from sklearn.base import BaseEstimator, ClassNamePrefixFeaturesOutMixin
+from sklearn.base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    OneToOneFeatureMixin,
+)
 from sklearn.utils._set_output import _wrap_data_with_container
 
 from cuml.accel import profilers
@@ -335,12 +339,16 @@ def _gpu_set_output(self, *, transform=None):
 
     def _gpu_get_feature_names_out(self, input_features=None):
         # In the common case `get_feature_names_out` doesn't require fitted attributes
-        # on the CPU. Here we detect and special case a common mixin, falling back to
+        # on the CPU. Here we detect and special case common mixins, falling back to
         # CPU when necessary. This helps avoid unnecessary device -> host transfers.
         cpu_method = self._cpu_class.get_feature_names_out
         if cpu_method is ClassNamePrefixFeaturesOutMixin.get_feature_names_out:
             # Can run cpu method directly on GPU instance, it only references `_n_features_out`
             return cpu_method(self._gpu, input_features=input_features)
+        if cpu_method is OneToOneFeatureMixin.get_feature_names_out:
+            # Uses n_features_in_ (and optionally feature_names_in_) on the estimator.
+            # cuML models set n_features_in_ on fit; feature_names_in_ is optional.
+            return cpu_method(self._gpu, input_features=input_features)
 
         # Fallback to CPU
         raise UnsupportedOnGPU

@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
 import cudf
@@ -9,6 +9,7 @@
 import pandas as pd
 from scipy import sparse as cpu_sparse
 
+from cuml.accel import enabled as cuml_accel_enabled
 from cuml.internals.input_utils import input_to_cupy_array
 
 numeric_types = [
@@ -227,6 +228,27 @@ def check_array(
     array_converted : object
         The converted and validated array.
     """
+    # Convert list-like inputs to numpy arrays early for compatibility with cuml.accel
+    # This ensures downstream functions can safely access .dtype and other array attributes
+    if (
+        cuml_accel_enabled()
+        and not isinstance(
+            array,
+            (
+                np.ndarray,
+                pd.DataFrame,
+                cudf.DataFrame,
+                pd.Series,
+                cudf.Series,
+                cp.ndarray,
+            ),
+        )
+        and not (cpu_sparse.issparse(array) or gpu_sparse.issparse(array))
+        and not hasattr(array, "__cuda_array_interface__")
+    ):
+        # Check if it's array-like (list, tuple, etc.) by checking for common sequence methods
+        if hasattr(array, "__len__") and hasattr(array, "__getitem__"):
+            array = np.asarray(array)
 
     if dtype == "numeric":
         dtype = numeric_types
@@ -250,7 +272,18 @@ def check_array(
     hasshape = hasattr(array, "shape")
     if ensure_2d and hasshape:
         if len(array.shape) != 2:
-            raise ValueError("Not 2D")
+            if len(array.shape) == 1:
+                raise ValueError(
+                    f"Expected 2D array, got 1D array instead:\narray={array!r}.\n"
+                    "Reshape your data either using array.reshape(-1, 1) if "
+                    "your data has a single feature or array.reshape(1, -1) "
+                    "if it contains a single sample."
+                )
+            else:
+                raise ValueError(
+                    f"Expected 2D array, got {len(array.shape)}D array instead:\n"
+                    f"array shape: {array.shape}.\n"
+                )
 
     if not allow_nd and hasshape:
         if len(array.shape) > 2:

@@ -17,7 +17,7 @@
     KNeighborsRegressor,
     NearestNeighbors,
 )
-from sklearn.preprocessing import TargetEncoder
+from sklearn.preprocessing import StandardScaler, TargetEncoder
 
 
 def test_kmeans():
@@ -44,6 +44,34 @@ def test_truncated_svd():
     svd.transform(X)
 
 
+def test_standard_scaler():
+    import numpy as np
+
+    X, _ = make_blobs(n_samples=100, centers=3, random_state=42)
+    scaler = StandardScaler().fit(X)
+
+    # Check fitted attributes exist
+    assert hasattr(scaler, "mean_")
+    assert hasattr(scaler, "var_")
+    assert hasattr(scaler, "scale_")
+    assert scaler.mean_.shape == (X.shape[1],)
+    assert scaler.var_.shape == (X.shape[1],)
+    assert scaler.scale_.shape == (X.shape[1],)
+
+    # Transform and check shape
+    X_transformed = scaler.transform(X)
+    assert X_transformed.shape == X.shape
+
+    # Check that transformed data has mean ≈ 0 and std ≈ 1
+    assert np.allclose(X_transformed.mean(axis=0), 0, atol=1e-7)
+    assert np.allclose(X_transformed.std(axis=0), 1, atol=1e-7)
+
+    # Check inverse transform
+    X_inverse = scaler.inverse_transform(X_transformed)
+    assert X_inverse.shape == X.shape
+    assert np.allclose(X_inverse, X, atol=1e-6)
+
+
 def test_linear_regression():
     X, y = make_regression(
         n_samples=100, n_features=20, noise=0.1, random_state=42