rapidsai · rapids-bot · Feb 10, 2026 · Feb 3, 2026 · Feb 4, 2026 · Feb 4, 2026
@@ -68,6 +68,7 @@ the following estimators are mostly or entirely accelerated when run with
     * ``sklearn.neighbors.KNeighborsClassifier``
     * ``sklearn.neighbors.KNeighborsRegressor``
     * ``sklearn.neighbors.KernelDensity``
+    * ``sklearn.preprocessing.StandardScaler``
     * ``sklearn.preprocessing.TargetEncoder``
     * ``sklearn.svm.SVC``
     * ``sklearn.svm.SVR``

@@ -402,6 +402,15 @@ Additional notes:
 sklearn.preprocessing
 ---------------------
 
+StandardScaler
+^^^^^^^^^^^^^^
+
+``StandardScaler`` will fall back to CPU in the following cases:
+
+- If ``partial_fit`` is called (incremental learning not supported on GPU).
+- If ``sample_weight`` is provided (weighted statistics not supported on GPU).
+- If ``X`` has object or complex dtype (``complex64``, ``complex128``).
+
 TargetEncoder
 ^^^^^^^^^^^^^
 

@@ -5,7 +5,7 @@
 # SPDX-FileCopyrightText: Eric Martin <eric@ericmart.in>
 # SPDX-FileCopyrightText: Giorgio Patrini <giorgio.patrini@anu.edu.au>
 # SPDX-FileCopyrightText: Eric Chang <ericchang2017@u.northwestern.edu>
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
 
 # Original authors from Sckit-Learn:
@@ -40,6 +40,7 @@
     SparseInputTagMixin,
     StatelessTagMixin,
 )
+from cuml.internals.interop import InteropMixin, to_cpu, to_gpu
 
 from ....common.array_descriptor import CumlArrayDescriptor
 from ....internals.array import CumlArray
@@ -519,6 +520,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
 
 class StandardScaler(TransformerMixin,
                      BaseEstimator,
+                     InteropMixin,
                      AllowNaNTagMixin,
                      SparseInputTagMixin):
     """Standardize features by removing the mean and scaling to unit variance
@@ -658,6 +660,47 @@ def _get_param_names(cls):
             "copy"
         ]
 
+    # InteropMixin requirements
+    _cpu_class_path = "sklearn.preprocessing.StandardScaler"
+
+    @classmethod
+    def _params_from_cpu(cls, model):
+        """Convert sklearn StandardScaler hyperparameters to cuML format."""
+        return {
+            "copy": model.copy,
+            "with_mean": model.with_mean,
+            "with_std": model.with_std,
+        }
+
+    def _params_to_cpu(self):
+        """Convert cuML StandardScaler hyperparameters to sklearn format."""
+        return {
+            "copy": self.copy,
+            "with_mean": self.with_mean,
+            "with_std": self.with_std,
+        }
+
+    def _attrs_from_cpu(self, model):
+        """Convert sklearn StandardScaler fitted attributes to cuML format."""
+        attrs = {
+            "mean_": to_gpu(mean) if (mean := getattr(model, "mean_", None)) is not None else None,
+            "var_": to_gpu(var) if (var := getattr(model, "var_", None)) is not None else None,
+            "scale_": to_gpu(scale) if (scale := getattr(model, "scale_", None)) is not None else None,
+            "n_samples_seen_": to_gpu(nss) if (nss := getattr(model, "n_samples_seen_", None)) is not None else None,
+        }
+        return {**attrs, **super()._attrs_from_cpu(model)}
+
+    def _attrs_to_cpu(self, model):
+        """Convert cuML StandardScaler fitted attributes to sklearn format."""
+
+        attrs = {
+            "mean_": to_cpu(mean) if (mean := getattr(self, "mean_", None)) is not None else None,
+            "var_": to_cpu(var) if (var := getattr(self, "var_", None)) is not None else None,
+            "scale_": to_cpu(scale) if (scale := getattr(self, "scale_", None)) is not None else None,
+            "n_samples_seen_": None if (nss := getattr(self, "n_samples_seen_", None)) is None else cpu_np.int64(nss) if cpu_np.isscalar(nss) else to_cpu(nss),
+        }
+        return {**attrs, **super()._attrs_to_cpu(model)}
+
     @reflect(reset=True)
     def fit(self, X, y=None) -> "StandardScaler":
         """Compute the mean and std to be used for later scaling.

@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: Gael Varoquaux <gael.varoquaux@normalesup.org>
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: BSD-3-Clause
 
 # Original authors from Sckit-Learn:
@@ -71,7 +71,7 @@ def _check_n_features(self, X, reset):
                 )
             if n_features != self.n_features_in_:
                 raise ValueError(
-                    'X has {} features, but this {} is expecting {} features '
+                    'X has {} features, but {} is expecting {} features '
                     'as input.'.format(n_features, self.__class__.__name__,
                                        self.n_features_in_)
                 )

@@ -4,12 +4,71 @@
 #
 
 import numpy as np
+from scipy import sparse as sp_sparse
 
 import cuml.preprocessing
 from cuml.accel.estimator_proxy import ProxyBase
 from cuml.internals.interop import UnsupportedOnGPU
 
-__all__ = ("TargetEncoder",)
+__all__ = ("StandardScaler", "TargetEncoder")
+
+
+class StandardScaler(ProxyBase):
+    _gpu_class = cuml.preprocessing.StandardScaler
+
+    def _gpu_fit(self, X, y=None, **kwargs):
+        if "sample_weight" in kwargs:
+            raise UnsupportedOnGPU("sample_weight parameter not supported")
+        # Reject complex and object dtypes
+        if hasattr(X, "dtype"):
+            if np.issubdtype(X.dtype, np.complexfloating):
+                raise UnsupportedOnGPU("Complex data types not supported")
+            if X.dtype == np.object_:
+                raise UnsupportedOnGPU("Object dtype not supported")
+        # Check for sparse matrices with unsupported properties
+        if sp_sparse.issparse(X):
+            # cupy sparse doesn't support int64 dtype
+            if X.dtype == np.int64:
+                raise UnsupportedOnGPU(
+                    "Sparse matrices with int64 dtype not supported on GPU "
+                    "(cupy sparse only supports float32, float64, complex64, complex128, bool)"
+                )
+            # cuML only supports CSR/CSC formats, not COO, DOK, LIL, etc.
+            if X.format not in ("csr", "csc"):
+                raise UnsupportedOnGPU(
+                    f"Sparse matrix format '{X.format}' not supported on GPU "
+                    "(only CSR and CSC formats are supported)"
+                )
+        return self._gpu.fit(X, y, **kwargs)
+
+    def _gpu_fit_transform(self, X, y=None, **kwargs):
+        if "sample_weight" in kwargs:
+            raise UnsupportedOnGPU("sample_weight parameter not supported")
+        # Reject complex and object dtypes
+        if hasattr(X, "dtype"):
+            if np.issubdtype(X.dtype, np.complexfloating):
+                raise UnsupportedOnGPU("Complex data types not supported")
+            if X.dtype == np.object_:
+                raise UnsupportedOnGPU("Object dtype not supported")
+        # Check for sparse matrices with unsupported properties
+        if sp_sparse.issparse(X):
+            # cupy sparse doesn't support int64 dtype
+            if X.dtype == np.int64:
+                raise UnsupportedOnGPU(
+                    "Sparse matrices with int64 dtype not supported on GPU "
+                    "(cupy sparse only supports float32, float64, complex64, complex128, bool)"
+                )
+            # cuML only supports CSR/CSC formats, not COO, DOK, LIL, etc.
+            if X.format not in ("csr", "csc"):
+                raise UnsupportedOnGPU(
+                    f"Sparse matrix format '{X.format}' not supported on GPU "
+                    "(only CSR and CSC formats are supported)"
+                )
+        return self._gpu.fit_transform(X, y, **kwargs)
+
+    def _gpu_partial_fit(self, X, y=None, **kwargs):
+        """partial_fit not supported on GPU - always fall back to CPU."""
+        raise UnsupportedOnGPU("partial_fit not supported on GPU")
 
 
 def _check_unsupported_inputs(X, y, cpu_model):

@@ -1,5 +1,5 @@
 #
-# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 #
 import cudf
@@ -227,6 +227,18 @@ def check_array(
     array_converted : object
         The converted and validated array.
     """
+    # Convert list-like inputs to numpy arrays early for compatibility with cuml.accel
+    # This ensures downstream functions can safely access .dtype and other array attributes
+    from cuml.accel import enabled as cuml_accel_enabled
+
+    if (
+        cuml_accel_enabled()
+        and not isinstance(array, (np.ndarray, pd.DataFrame, cudf.DataFrame))
+        and not (cpu_sparse.issparse(array) or gpu_sparse.issparse(array))
+    ):
+        # Check if it's array-like (list, tuple, etc.) by checking for common sequence methods
+        if hasattr(array, "__len__") and hasattr(array, "__getitem__"):
+            array = np.asarray(array)
 
     if dtype == "numeric":
         dtype = numeric_types
@@ -250,7 +262,18 @@ def check_array(
     hasshape = hasattr(array, "shape")
     if ensure_2d and hasshape:
         if len(array.shape) != 2:
-            raise ValueError("Not 2D")
+            if len(array.shape) == 1:
+                raise ValueError(
+                    f"Expected 2D array, got 1D array instead:\narray={array!r}.\n"
+                    "Reshape your data either using array.reshape(-1, 1) if "
+                    "your data has a single feature or array.reshape(1, -1) "
+                    "if it contains a single sample."
+                )
+            else:
+                raise ValueError(
+                    f"Expected 2D array, got {len(array.shape)}D array instead:\n"
+                    f"array shape: {array.shape}.\n"
+                )
 
     if not allow_nd and hasshape:
         if len(array.shape) > 2:

@@ -17,7 +17,7 @@
     KNeighborsRegressor,
     NearestNeighbors,
 )
-from sklearn.preprocessing import TargetEncoder
+from sklearn.preprocessing import StandardScaler, TargetEncoder
 
 
 def test_kmeans():
@@ -44,6 +44,34 @@ def test_truncated_svd():
     svd.transform(X)
 
 
+def test_standard_scaler():
+    import numpy as np
+
+    X, _ = make_blobs(n_samples=100, centers=3, random_state=42)
+    scaler = StandardScaler().fit(X)
+
+    # Check fitted attributes exist
+    assert hasattr(scaler, "mean_")
+    assert hasattr(scaler, "var_")
+    assert hasattr(scaler, "scale_")
+    assert scaler.mean_.shape == (X.shape[1],)
+    assert scaler.var_.shape == (X.shape[1],)
+    assert scaler.scale_.shape == (X.shape[1],)
+
+    # Transform and check shape
+    X_transformed = scaler.transform(X)
+    assert X_transformed.shape == X.shape
+
+    # Check that transformed data has mean ≈ 0 and std ≈ 1
+    assert np.allclose(X_transformed.mean(axis=0), 0, atol=1e-7)
+    assert np.allclose(X_transformed.std(axis=0), 1, atol=1e-7)
+
+    # Check inverse transform
+    X_inverse = scaler.inverse_transform(X_transformed)
+    assert X_inverse.shape == X.shape
+    assert np.allclose(X_inverse, X, atol=1e-6)
+
+
 def test_linear_regression():
     X, y = make_regression(
         n_samples=100, n_features=20, noise=0.1, random_state=42