Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-FileCopyrightText: Andreas Mueller
# SPDX-FileCopyrightText: Joris Van den Bossche
# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: BSD-3-Clause

# Original authors from Sckit-Learn:
Expand Down Expand Up @@ -36,6 +36,7 @@
import cuml
from cuml.internals.array_sparse import SparseCumlArray
from cuml.internals.global_settings import _global_settings_data
from cuml.internals.validation import check_is_fitted

from ....thirdparty_adapters import check_array
from ..preprocessing._function_transformer import FunctionTransformer
Expand All @@ -44,7 +45,6 @@
BaseEstimator,
TransformerMixin,
)
from ..utils.validation import check_is_fitted

_ERR_MSG_1DCOLUMN = ("1D data passed to a transformer that expects 2D data. "
"Try to specify the column selection as a list of one "
Expand Down
7 changes: 2 additions & 5 deletions python/cuml/cuml/_thirdparty/sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
StatelessTagMixin,
)
from cuml.internals.interop import InteropMixin, to_cpu, to_gpu
from cuml.internals.validation import check_is_fitted

from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array import CumlArray
Expand All @@ -59,11 +60,7 @@
mean_variance_axis,
min_max_axis,
)
from ..utils.validation import (
FLOAT_DTYPES,
check_is_fitted,
check_random_state,
)
from ..utils.validation import FLOAT_DTYPES, check_random_state
Comment thread
jcrist marked this conversation as resolved.

BOUNDS_THRESHOLD = 1e-7

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@
from cuml.cluster import KMeans
from cuml.internals.mixins import SparseInputTagMixin
from cuml.preprocessing.encoders import OneHotEncoder
from cuml.internals.validation import check_is_fitted

from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array_sparse import SparseCumlArray
from ....internals.outputs import using_output_type, reflect
from ....thirdparty_adapters import check_array
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from ..utils.validation import FLOAT_DTYPES, check_is_fitted
from ..utils.validation import FLOAT_DTYPES


def digitize(x, bins):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-FileCopyrightText: Nicolas Tresegnie <[email protected]>
# SPDX-FileCopyrightText: Sergey Feldman <[email protected]>
# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: BSD-3-Clause

# Original authors from Sckit-Learn:
Expand Down Expand Up @@ -28,6 +28,7 @@
SparseInputTagMixin,
StringInputTagMixin,
)
from cuml.internals.validation import check_is_fitted

from ....common.array_descriptor import CumlArrayDescriptor
from ....internals.array_sparse import SparseCumlArray
Expand All @@ -39,7 +40,7 @@
_masked_column_mode,
)
from ..utils.skl_dependencies import BaseEstimator, TransformerMixin
from ..utils.validation import FLOAT_DTYPES, check_is_fitted
from ..utils.validation import FLOAT_DTYPES


def is_scalar_nan(x):
Expand Down
71 changes: 1 addition & 70 deletions python/cuml/cuml/_thirdparty/sklearn/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-FileCopyrightText: Alexandre Gramfort
# SPDX-FileCopyrightText: Nicolas Tresegnie
# SPDX-FileCopyrightText: Sylvain Marie
# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: BSD-3-Clause

# Original authors from Sckit-Learn:
Expand All @@ -32,7 +32,6 @@
import cupyx.scipy.sparse as sp
import numpy as np

from ....common.exceptions import NotFittedError
from ....thirdparty_adapters import check_array

FLOAT_DTYPES = (np.float64, np.float32, np.float16)
Expand Down Expand Up @@ -176,74 +175,6 @@ def check_random_state(seed):
' instance' % seed)


def check_is_fitted(estimator, attributes=None, *, msg=None, all_or_any=all):
"""Perform is_fitted validation for estimator.

Checks if the estimator is fitted by verifying the presence of
fitted attributes (ending with a trailing underscore) and otherwise
raises a NotFittedError with the given message.

This utility is meant to be used internally by estimators themselves,
typically in their own predict / transform methods.

Parameters
----------
estimator : estimator instance.
estimator instance for which the check is performed.

attributes : str, list or tuple of str, default=None
Attribute name(s) given as string or a list/tuple of strings
Eg.: ``["coef_", "estimator_", ...], "coef_"``

If `None`, `estimator` is considered fitted if there exist an
attribute that ends with a underscore and does not start with double
underscore.

msg : string
The default error message is, "This %(name)s instance is not fitted
yet. Call 'fit' with appropriate arguments before using this
estimator."

For custom messages if "%(name)s" is present in the message string,
it is substituted for the estimator name.

Eg. : "Estimator, %(name)s, must be fitted before sparsifying".

all_or_any : callable, {all, any}, default all
Specify whether all or any of the given attributes must exist.

Returns
-------
None

Raises
------
NotFittedError
If the attributes are not found.
"""
if isclass(estimator):
raise TypeError("{} is a class, not an instance.".format(estimator))
if msg is None:
msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
"appropriate arguments before using this estimator.")

if not hasattr(estimator, 'fit'):
raise TypeError("%s is not an estimator instance." % (estimator))

if attributes is not None:
if not isinstance(attributes, (list, tuple)):
attributes = [attributes]
attrs = all_or_any([hasattr(estimator, attr) for attr in attributes])
elif hasattr(estimator, "__sklearn_is_fitted__"):
attrs = estimator.__sklearn_is_fitted__()
else:
attrs = [v for v in vars(estimator)
if v.endswith("_") and not v.startswith("__")]

if not attrs:
raise NotFittedError(msg % {'name': type(estimator).__name__})


def _allclose_dense_sparse(x, y, rtol=1e-7, atol=1e-9):
"""Check allclose for sparse and dense data.

Expand Down
10 changes: 4 additions & 6 deletions python/cuml/cuml/cluster/hdbscan/hdbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ from cuml.internals.interop import (
)
from cuml.internals.mem_type import MemoryType
from cuml.internals.mixins import ClusterMixin, CMajorInputTagMixin
from cuml.internals.validation import check_is_fitted

from cython.operator cimport dereference as deref
from libc.stdint cimport int64_t, uint64_t, uintptr_t
Expand Down Expand Up @@ -906,8 +907,7 @@ class HDBSCAN(Base, InteropMixin, ClusterMixin, CMajorInputTagMixin):
the label of new/unseen points. This data is only useful if you
are intending to use functions from hdbscan.prediction.
"""
if getattr(self, "labels_", None) is None:
raise ValueError("The model is not trained yet (call fit() first).")
check_is_fitted(self)

with cuml.using_output_type("cuml"):
labels = self.labels_
Expand Down Expand Up @@ -1107,10 +1107,8 @@ def _check_clusterer(clusterer):
f"Expected an instance of `HDBSCAN`, got {type(clusterer).__name__}"
)

if getattr(clusterer, "labels_", None) is None:
raise ValueError(
"The clusterer is not fit, please call `clusterer.fit` first"
)
check_is_fitted(clusterer)

cdef _HDBSCANState state = <_HDBSCANState?>clusterer._state

if state.prediction_data == NULL:
Expand Down
6 changes: 5 additions & 1 deletion python/cuml/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ from cuml.internals.interop import (
)
from cuml.internals.mixins import ClusterMixin, CMajorInputTagMixin
from cuml.internals.outputs import reflect, run_in_internal_context
from cuml.internals.utils import check_random_seed
from cuml.internals.validation import check_is_fitted, check_random_seed

from libc.stdint cimport int64_t, uintptr_t
from libcpp cimport bool
Expand Down Expand Up @@ -633,6 +633,8 @@ class KMeans(Base,
inertia : float
Sum of squared distances of samples to their closest cluster center.
"""
check_is_fitted(self)

dtype = self.cluster_centers_.dtype

X_m, n_rows, _, _ = input_to_cuml_array(
Expand Down Expand Up @@ -694,6 +696,8 @@ class KMeans(Base,
Transform X to a cluster-distance space.

"""
check_is_fitted(self)

dtype = self.cluster_centers_.dtype

X_m = input_to_cuml_array(
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/cluster/spectral_clustering.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ from cuml.common.array_descriptor import CumlArrayDescriptor
from cuml.internals.array import CumlArray
from cuml.internals.base import Base, get_handle
from cuml.internals.input_utils import input_to_cupy_array
from cuml.internals.utils import check_random_seed
from cuml.internals.validation import check_random_seed

from libc.stdint cimport uint64_t, uintptr_t
from libcpp cimport bool
Expand Down
23 changes: 17 additions & 6 deletions python/cuml/cuml/common/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
#
# SPDX-FileCopyrightText: Copyright (c) 2020, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#
__all__ = ("NotFittedError",) # noqa


class NotFittedError(ValueError, AttributeError):
"""Exception class to raise if estimator is used before fitting.
def __getattr__(name):
if name == "NotFittedError":
import warnings

This class inherits from both ValueError and AttributeError to help with
exception handling and backward compatibility.
"""
from sklearn.exceptions import NotFittedError

warnings.warn(
"`cuml.common.exceptions.NotFittedError` was deprecated in 26.04 "
"and will be removed in 26.06. Please use "
"`sklearn.exceptions.NotFittedError` instead.",
FutureWarning,
stacklevel=2,
)
Comment thread
jcrist marked this conversation as resolved.
return NotFittedError
else:
raise AttributeError(f"module {__name__} has no attribute {name}")
9 changes: 9 additions & 0 deletions python/cuml/cuml/covariance/ledoit_wolf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from cuml.internals.base import Base
from cuml.internals.input_utils import input_to_cupy_array
from cuml.internals.interop import InteropMixin, to_cpu, to_gpu
from cuml.internals.validation import check_is_fitted


def _ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
Expand Down Expand Up @@ -299,6 +300,8 @@ def get_precision(self):
precision_ : ndarray of shape (n_features, n_features)
The precision matrix associated to the current covariance object.
"""
check_is_fitted(self)

if self.store_precision:
return self.precision_
else:
Expand All @@ -324,6 +327,8 @@ def score(self, X_test, y=None) -> float:
log_likelihood : float
Log-likelihood of the data under the fitted Gaussian model.
"""
check_is_fitted(self)

X_arr, _, n_features, _ = input_to_cupy_array(
X_test,
check_dtype=[np.float32, np.float64],
Expand Down Expand Up @@ -367,6 +372,8 @@ def error_norm(
The Mean Squared Error (in the sense of the Frobenius norm)
between `self` and `comp_cov`.
"""
check_is_fitted(self)

comp_cov_arr, _, _, _ = input_to_cupy_array(
comp_cov,
check_dtype=[np.float32, np.float64],
Expand Down Expand Up @@ -408,6 +415,8 @@ def mahalanobis(self, X):
mahalanobis_distances : ndarray of shape (n_samples,)
Squared Mahalanobis distances of the observations.
"""
check_is_fitted(self)

X_arr, _, _, _ = input_to_cupy_array(
X,
check_dtype=[np.float32, np.float64],
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/dask/cluster/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)
from cuml.dask.common.input_utils import DistributedDataHandler, concatenate
from cuml.dask.common.utils import wait_and_raise_from_futures
from cuml.internals.utils import check_random_seed
from cuml.internals.validation import check_random_seed


class KMeans(BaseEstimator, DelayedPredictionMixin, DelayedTransformMixin):
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/dask/preprocessing/LabelEncoder.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#
from collections.abc import Sequence

from dask_cudf import DataFrame as dcDataFrame
from dask_cudf import Series as dcSeries
from sklearn.exceptions import NotFittedError
Comment thread
jcrist marked this conversation as resolved.
Comment thread
jcrist marked this conversation as resolved.
from toolz import first

from cuml.common.exceptions import NotFittedError
from cuml.dask.common.base import (
BaseEstimator,
DelayedInverseTransformMixin,
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/datasets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import cuml.internals
import cuml.internals.nvtx as nvtx
from cuml.datasets.utils import _create_rs_generator
from cuml.internals.utils import check_random_seed
from cuml.internals.validation import check_random_seed


def _generate_hypercube(samples, dimensions, random_state):
Expand Down
2 changes: 2 additions & 0 deletions python/cuml/cuml/decomposition/incremental_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from cuml.internals.array import CumlArray
from cuml.internals.base import Base
from cuml.internals.input_utils import input_to_cupy_array
from cuml.internals.validation import check_is_fitted


class IncrementalPCA(PCA):
Expand Down Expand Up @@ -418,6 +419,7 @@ def transform(self, X, *, convert_dtype=False) -> CumlArray:
X_new : array-like, shape (n_samples, n_components)

"""
check_is_fitted(self)

if scipy.sparse.issparse(X) or cupyx.scipy.sparse.issparse(X):
X = _validate_sparse_input(X)
Expand Down
Loading