Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions python/cuml/cuml/internals/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,11 @@ def reflect(
provide ``None`` to disable this inference entirely; in this case the
output type is expected to be specified manually either internal or
external to the method.
reset : bool, default=False
Set to True for methods like ``fit`` that reset the reflected type on
an estimator.
reset : bool or "type", default=False
If True, both the features and reflected type are reset on the estimator.
If ``"type"``, only the reflected type is reset on the estimator.
Defaults to False, to not reset anything. Most estimators should set
``reset=True`` on any fit-like methods.
"""
# Local to avoid circular imports
import cuml.accel
Expand Down Expand Up @@ -391,9 +393,12 @@ def reflect(
if array is not None:
array = _get_param(sig, array)

if reset and (model is None or array is None):
if reset not in (True, False, "type"):
raise ValueError(f"reset={reset!r} is not supported")

if (reset is not False) and (model is None or array is None):
raise ValueError(
"`reset=True` is not valid with `array=None` or `model=None`"
f"`reset={reset}` is not valid with `array=None` or `model=None`"
)

@functools.wraps(func)
Expand All @@ -411,8 +416,9 @@ def inner(*args, **kwargs):
array_arg = np.asarray(array_arg)

with enter_internal_context() as was_external:
if reset:
if reset is not False:
model_arg._set_output_type(array_arg)
if reset is True:
check_features(model_arg, array_arg, reset=True)

res = func(*args, **kwargs)
Expand Down
36 changes: 32 additions & 4 deletions python/cuml/cuml/internals/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ def _get_n_features(X):
return len(row)
except Exception:
pass
return 1

if hasattr(X, "shape"):
shape = X.shape
Expand All @@ -82,9 +81,38 @@ def _get_n_features(X):
else:
shape = np.asarray(X).shape

# TODO: Can remove the fallback to 1 when we finish dropping support
# for 1D X inputs
return shape[1] if len(shape) >= 2 else 1
ndim = len(shape)

if ndim != 2:
import cuml.accel

if isinstance(X, (cudf.Series, pd.Series)):
msg = (
f"Expected a 2-dimensional container but got {type(X).__name__} "
"instead. Pass a DataFrame containing a single row (i.e. "
"single sample) or a single column (i.e. single feature) "
"instead."
)
else:
kind = "scalar" if ndim == 0 else f"{ndim}D"
msg = (
f"Expected 2D array, got {kind} array instead. Reshape your data "
"using array.reshape(-1, 1) if your data has a single feature, "
"or array.reshape(1, -1) if it contains a single sample."
)

if cuml.accel.enabled() or ndim > 2:
raise ValueError(msg)
else:
warnings.warn(
"Support for passing non-2-dimensional X was deprecated in 26.04 "
"and will be removed in 26.06. In cuml 26.06 this will error "
f"with the following message:\n\n{msg}",
FutureWarning,
)
Comment thread
jcrist marked this conversation as resolved.
# Fallback to 1 feature until the deprecation is completed
return 1
return shape[1]


def _warn_or_error(exc_cls, msg):
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/cuml/preprocessing/TargetEncoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ class TargetEncoder(Base, InteropMixin):
>>> test = DataFrame({'category': ['a', 'c', 'b', 'a']})

>>> encoder = TargetEncoder(output_type='numpy')
>>> train_encoded = encoder.fit_transform(train.category, train.label)
>>> test_encoded = encoder.transform(test.category)
>>> train_encoded = encoder.fit_transform(train[["category"]], train.label)
>>> test_encoded = encoder.transform(test[["category"]])
>>> print(train_encoded)
[1. 1. 0. 1.]
>>> print(test_encoded)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/cuml/preprocessing/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def __init__(
self.sparse_output = sparse_output
self.classes_ = None

@cuml.internals.reflect(reset=True)
@cuml.internals.reflect(reset="type")
def fit(self, y) -> "LabelBinarizer":
"""
Fit label binarizer
Expand Down

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions python/cuml/tests/test_coordinate_descent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#

Expand Down Expand Up @@ -274,7 +274,7 @@ def test_lasso_predict_convert_dtype(train_dtype, test_dtype):

@pytest.mark.parametrize("cls", [cuml.ElasticNet, cuml.Lasso])
def test_set_params(cls):
x = np.linspace(0, 1, 50)
x = np.linspace(0, 1, 50)[:, None]
y = 2 * x

model = cls(alpha=0.01)
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/tests/test_dbscan.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
#

Expand Down Expand Up @@ -497,7 +497,7 @@ def test_dbscan_no_calc_core_point_indices():


def test_dbscan_on_empty_array():
X = np.array([])
X = np.array([[]])
cuml_dbscan = cuDBSCAN()

with pytest.raises(ValueError):
Expand Down
9 changes: 8 additions & 1 deletion python/cuml/tests/test_label_binarizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0

import cupy as cp
Expand All @@ -11,6 +11,13 @@
from cuml.testing.utils import array_equal


def test_label_binarizer_no_features():
"""Ensure the features infra is never applied to LabelBinarizer"""
y = cp.asarray([1, 2, 1, 2, 1, 0])
model = LabelBinarizer().fit(y)
assert not hasattr(model, "n_features_in_")


@pytest.mark.parametrize(
"labels",
[
Expand Down
7 changes: 7 additions & 0 deletions python/cuml/tests/test_label_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ def _df_to_similarity_mat(df):
return np.pad(arr, [(arr.shape[1] - 1, 0), (0, 0)], "edge")


def test_label_encoder_no_features():
"""Ensure the features infra is never applied to LabelEncoder"""
y = cp.asarray([1, 2, 1, 2, 1, 0])
model = LabelEncoder().fit(y)
assert not hasattr(model, "n_features_in_")


@pytest.mark.parametrize("length", [10, 1000])
@pytest.mark.parametrize("cardinality", [5, 10, 50])
def test_labelencoder_fit_transform(length, cardinality):
Expand Down
Loading
Loading