diff --git a/python/cuml/cuml/internals/outputs.py b/python/cuml/cuml/internals/outputs.py index 7d8df836e5..6e6c1a7c53 100644 --- a/python/cuml/cuml/internals/outputs.py +++ b/python/cuml/cuml/internals/outputs.py @@ -357,9 +357,11 @@ def reflect( provide ``None`` to disable this inference entirely; in this case the output type is expected to be specified manually either internal or external to the method. - reset : bool, default=False - Set to True for methods like ``fit`` that reset the reflected type on - an estimator. + reset : bool or "type", default=False + If True, both the features and reflected type are reset on the estimator. + If ``"type"``, only the reflected type is reset on the estimator. + Defaults to False, to not reset anything. Most estimators should set + ``reset=True`` on any fit-like methods. """ # Local to avoid circular imports import cuml.accel @@ -391,9 +393,12 @@ def reflect( if array is not None: array = _get_param(sig, array) - if reset and (model is None or array is None): + if reset not in (True, False, "type"): + raise ValueError(f"reset={reset!r} is not supported") + + if (reset is not False) and (model is None or array is None): raise ValueError( - "`reset=True` is not valid with `array=None` or `model=None`" + f"`reset={reset}` is not valid with `array=None` or `model=None`" ) @functools.wraps(func) @@ -411,8 +416,9 @@ def inner(*args, **kwargs): array_arg = np.asarray(array_arg) with enter_internal_context() as was_external: - if reset: + if reset is not False: model_arg._set_output_type(array_arg) + if reset is True: check_features(model_arg, array_arg, reset=True) res = func(*args, **kwargs) diff --git a/python/cuml/cuml/internals/validation.py b/python/cuml/cuml/internals/validation.py index 1197172e2e..4fa0125ffb 100644 --- a/python/cuml/cuml/internals/validation.py +++ b/python/cuml/cuml/internals/validation.py @@ -71,7 +71,6 @@ def _get_n_features(X): return len(row) except Exception: pass - return 1 if hasattr(X, "shape"): shape = X.shape @@ -82,9 +81,38 @@ def _get_n_features(X): else: shape = np.asarray(X).shape - # TODO: Can remove the fallback to 1 when we finish dropping support - # for 1D X inputs - return shape[1] if len(shape) >= 2 else 1 + ndim = len(shape) + + if ndim != 2: + import cuml.accel + + if isinstance(X, (cudf.Series, pd.Series)): + msg = ( + f"Expected a 2-dimensional container but got {type(X).__name__} " + "instead. Pass a DataFrame containing a single row (i.e. " + "single sample) or a single column (i.e. single feature) " + "instead." + ) + else: + kind = "scalar" if ndim == 0 else f"{ndim}D" + msg = ( + f"Expected 2D array, got {kind} array instead. Reshape your data " + "using array.reshape(-1, 1) if your data has a single feature, " + "or array.reshape(1, -1) if it contains a single sample." + ) + + if cuml.accel.enabled() or ndim > 2: + raise ValueError(msg) + else: + warnings.warn( + "Support for passing non-2-dimensional X was deprecated in 26.04 " + "and will be removed in 26.06. In cuml 26.06 this will error " + f"with the following message:\n\n{msg}", + FutureWarning, + ) + # Fallback to 1 feature until the deprecation is completed + return 1 + return shape[1] def _warn_or_error(exc_cls, msg): diff --git a/python/cuml/cuml/preprocessing/TargetEncoder.py b/python/cuml/cuml/preprocessing/TargetEncoder.py index a0824a7243..5b2af8457f 100644 --- a/python/cuml/cuml/preprocessing/TargetEncoder.py +++ b/python/cuml/cuml/preprocessing/TargetEncoder.py @@ -174,8 +174,8 @@ class TargetEncoder(Base, InteropMixin): >>> test = DataFrame({'category': ['a', 'c', 'b', 'a']}) >>> encoder = TargetEncoder(output_type='numpy') - >>> train_encoded = encoder.fit_transform(train.category, train.label) - >>> test_encoded = encoder.transform(test.category) + >>> train_encoded = encoder.fit_transform(train[["category"]], train.label) + >>> test_encoded = encoder.transform(test[["category"]]) >>> print(train_encoded) [1. 1. 0. 1.] >>> print(test_encoded) diff --git a/python/cuml/cuml/preprocessing/label.py b/python/cuml/cuml/preprocessing/label.py index e7afa6cea7..590e5fc458 100644 --- a/python/cuml/cuml/preprocessing/label.py +++ b/python/cuml/cuml/preprocessing/label.py @@ -157,7 +157,7 @@ def __init__( self.sparse_output = sparse_output self.classes_ = None - @cuml.internals.reflect(reset=True) + @cuml.internals.reflect(reset="type") def fit(self, y) -> "LabelBinarizer": """ Fit label binarizer diff --git a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml index 91b04a85bc..94e2d0ab85 100644 --- a/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml +++ b/python/cuml/cuml_accel_tests/upstream/scikit-learn/xfail-list.yaml @@ -175,8 +175,6 @@ - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_array-randomized]" - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_matrix-full]" - "sklearn.decomposition.tests.test_pca::test_sparse_pca_solver_error[42-csr_matrix-randomized]" - - "sklearn.ensemble.tests.test_forest::test_1d_input[RandomForestClassifier]" - - "sklearn.ensemble.tests.test_forest::test_1d_input[RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_backend_respected" - "sklearn.ensemble.tests.test_forest::test_class_weights[RandomForestClassifier]" - "sklearn.ensemble.tests.test_forest::test_estimators_samples[RandomForestClassifier-False-1]" @@ -441,11 +439,9 @@ - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_supervised_y_2d]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',LogisticRegression())])-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[Pipeline(steps=[('scaler',StandardScaler()),('final_estimator',Ridge())])-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[SGDClassifier()-check_sample_weights_invariance(kind=zeros)]" @@ -460,21 +456,16 @@ - "sklearn.tests.test_common::test_estimators[SpectralCoclustering()-check_estimators_dtypes]" - "sklearn.tests.test_common::test_estimators[SpectralCoclustering()-check_fit2d_1feature]" - "sklearn.tests.test_common::test_estimators[SpectralCoclustering()-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[SpectralCoclustering()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[SpectralCoclustering()-check_methods_subset_invariance]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_fit2d_1feature]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_grid={'logisticregression__C':[0.1,1.0]})-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_fit2d_1feature]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_grid={'ridge__alpha':[0.1,1.0]})-check_supervised_y_no_nan]" @@ -482,12 +473,8 @@ - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=LogisticRegression(),param_grid={'C':[0.1,1.0]})-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=LogisticRegression(),param_grid={'C':[0.1,1.0]})-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=LogisticRegression(),param_grid={'C':[0.1,1.0]})-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=LogisticRegression(),param_grid={'C':[0.1,1.0]})-check_fit1d]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=LogisticRegression(),param_grid={'C':[0.1,1.0]})-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_fit1d]" - - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[GridSearchCV(cv=2,estimator=Ridge(),param_grid={'alpha':[0.1,1.0]})-check_supervised_y_no_nan]" @@ -495,19 +482,15 @@ - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),min_resources='smallest',param_grid={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_fit_returns_self(readonly_memmap=True)]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_fit_returns_self]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_overwrite_params]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),min_resources='smallest',param_grid={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_no_nan]" @@ -519,20 +502,12 @@ - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf0]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf1]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_fit1d0]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_fit1d1]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d0]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=LogisticRegression(),min_resources='smallest',param_grid={'C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_dtype_object0]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_dtype_object1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf0]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf1]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit1d0]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit1d1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit2d_1sample0]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit2d_1sample1]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d0]" - - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array0]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array1]" - "sklearn.tests.test_common::test_search_cv[HalvingGridSearchCV(cv=2,estimator=Ridge(),min_resources='smallest',param_grid={'alpha':[0.1,1.0]},random_state=0)-check_supervised_y_2d0]" @@ -543,20 +518,16 @@ - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_fit_returns_self(readonly_memmap=True)]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_fit_returns_self]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_overwrite_params]" - - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[HalvingRandomSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_no_nan]" @@ -564,16 +535,12 @@ - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('logisticregression',LogisticRegression())]),param_distributions={'logisticregression__C':[0.1,1.0]},random_state=0)-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_1feature]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,error_score='raise',estimator=Pipeline(steps=[('pca',PCA()),('ridge',Ridge())]),param_distributions={'ridge__alpha':[0.1,1.0]},random_state=0)-check_supervised_y_no_nan]" @@ -581,12 +548,8 @@ - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=LogisticRegression(),param_distributions={'C':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=LogisticRegression(),param_distributions={'C':[0.1,1.0]},random_state=0)-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=LogisticRegression(),param_distributions={'C':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=LogisticRegression(),param_distributions={'C':[0.1,1.0]},random_state=0)-check_fit1d]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=LogisticRegression(),param_distributions={'C':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_dtype_object]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_fit1d]" - - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_supervised_y_2d]" - "sklearn.tests.test_common::test_search_cv[RandomizedSearchCV(cv=2,estimator=Ridge(),param_distributions={'alpha':[0.1,1.0]},random_state=0)-check_supervised_y_no_nan]" @@ -745,9 +708,7 @@ - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[LogisticRegression()-check_sparsify_coefficients]" @@ -905,8 +866,6 @@ - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_estimators_pickle(readonly_memmap=True)]" - - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_regressors_train(readonly_memmap=True)]" - "sklearn.tests.test_common::test_estimators[KernelRidge()-check_regressors_train(readonly_memmap=True,X_dtype=float32)]" @@ -1091,7 +1050,6 @@ tests: - "sklearn.tests.test_common::test_estimators[SVR()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[SVR()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[SVR()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[SVR()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[SVR()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[SVR()-check_sample_weights_not_an_array]" @@ -1113,12 +1071,9 @@ - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[DBSCAN()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_regressor_multioutput]" - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_requires_y_none]" @@ -1126,32 +1081,25 @@ - "sklearn.tests.test_common::test_estimators[ElasticNet()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[KMeans()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[KMeans()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[KMeans()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[KMeans()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[KMeans()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[KMeans()-check_transformer_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[KNeighborsClassifier()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[KNeighborsRegressor()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[KernelDensity()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[KernelDensity()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[KernelDensity()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[KernelDensity()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[Lasso()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_regressor_multioutput]" - "sklearn.tests.test_common::test_estimators[Lasso()-check_requires_y_none]" @@ -1163,9 +1111,7 @@ - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[LinearRegression()-check_sample_weights_not_an_array]" @@ -1176,10 +1122,8 @@ - "sklearn.tests.test_common::test_estimators[PCA()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[PCA()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[PCA()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[PCA()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[PCA()-check_fit2d_1feature]" - "sklearn.tests.test_common::test_estimators[PCA()-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[PCA()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[PCA()-check_transformer_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_classifiers_multilabel_output_format_decision_function]" @@ -1187,36 +1131,27 @@ - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_classifiers_train]" - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_estimators_empty_data_messages]" - - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[RandomForestClassifier()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_estimators_empty_data_messages]" - - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[RandomForestRegressor()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[Ridge()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[Ridge()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[Ridge()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[SpectralEmbedding()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[SpectralEmbedding()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[StandardScaler()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[TSNE()-check_estimators_nan_inf]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_estimators_empty_data_messages]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_fit1d]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_fit2d_1feature]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_fit2d_1sample]" - - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[TruncatedSVD()-check_transformer_data_not_an_array]" - reason: test_estimators checks fail marker: cuml_accel_test_estimators @@ -1241,8 +1176,8 @@ condition: scikit-learn<1.6 tests: - "sklearn.tests.test_common::test_estimators[TSNE()-check_pipeline_consistency]" -- reason: TSNE does validation on __init__, which causes these tests to fail only on certain sklearn versions - marker: cuml_accel_tsne_validation_on_init +- reason: TSNE currently errors on small n_samples + marker: cuml_accel_tsne_validations condition: scikit-learn>=1.6 tests: - "sklearn.tests.test_common::test_estimators[TSNE()-check_dont_overwrite_parameters]" @@ -1400,8 +1335,6 @@ - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_classifier_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_sample_weights_not_an_array]" - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_sparsify_coefficients]" @@ -1409,8 +1342,6 @@ - "sklearn.tests.test_common::test_estimators[LinearSVC()-check_supervised_y_no_nan]" - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_dtype_object]" - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_estimators_nan_inf]" - - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_fit1d]" - - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_fit2d_predict1d]" - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_regressor_data_not_an_array]" - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_requires_y_none]" - "sklearn.tests.test_common::test_estimators[LinearSVR()-check_sample_weights_not_an_array]" diff --git a/python/cuml/tests/test_coordinate_descent.py b/python/cuml/tests/test_coordinate_descent.py index b180042d81..6588d5f419 100644 --- a/python/cuml/tests/test_coordinate_descent.py +++ b/python/cuml/tests/test_coordinate_descent.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -274,7 +274,7 @@ def test_lasso_predict_convert_dtype(train_dtype, test_dtype): @pytest.mark.parametrize("cls", [cuml.ElasticNet, cuml.Lasso]) def test_set_params(cls): - x = np.linspace(0, 1, 50) + x = np.linspace(0, 1, 50)[:, None] y = 2 * x model = cls(alpha=0.01) diff --git a/python/cuml/tests/test_dbscan.py b/python/cuml/tests/test_dbscan.py index 33baebe513..c5d1f3f8b8 100644 --- a/python/cuml/tests/test_dbscan.py +++ b/python/cuml/tests/test_dbscan.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # @@ -497,7 +497,7 @@ def test_dbscan_no_calc_core_point_indices(): def test_dbscan_on_empty_array(): - X = np.array([]) + X = np.array([[]]) cuml_dbscan = cuDBSCAN() with pytest.raises(ValueError): diff --git a/python/cuml/tests/test_label_binarizer.py b/python/cuml/tests/test_label_binarizer.py index 640fdcef9d..78f745823f 100644 --- a/python/cuml/tests/test_label_binarizer.py +++ b/python/cuml/tests/test_label_binarizer.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 import cupy as cp @@ -11,6 +11,13 @@ from cuml.testing.utils import array_equal +def test_label_binarizer_no_features(): + """Ensure the features infra is never applied to LabelBinarizer""" + y = cp.asarray([1, 2, 1, 2, 1, 0]) + model = LabelBinarizer().fit(y) + assert not hasattr(model, "n_features_in_") + + @pytest.mark.parametrize( "labels", [ diff --git a/python/cuml/tests/test_label_encoder.py b/python/cuml/tests/test_label_encoder.py index 222d0c0afc..50913a34e0 100644 --- a/python/cuml/tests/test_label_encoder.py +++ b/python/cuml/tests/test_label_encoder.py @@ -20,6 +20,13 @@ def _df_to_similarity_mat(df): return np.pad(arr, [(arr.shape[1] - 1, 0), (0, 0)], "edge") +def test_label_encoder_no_features(): + """Ensure the features infra is never applied to LabelEncoder""" + y = cp.asarray([1, 2, 1, 2, 1, 0]) + model = LabelEncoder().fit(y) + assert not hasattr(model, "n_features_in_") + + @pytest.mark.parametrize("length", [10, 1000]) @pytest.mark.parametrize("cardinality", [5, 10, 50]) def test_labelencoder_fit_transform(length, cardinality): diff --git a/python/cuml/tests/test_target_encoder.py b/python/cuml/tests/test_target_encoder.py index ac5450d267..ab1aba1489 100644 --- a/python/cuml/tests/test_target_encoder.py +++ b/python/cuml/tests/test_target_encoder.py @@ -16,36 +16,57 @@ ) -def test_targetencoder_fit_transform(): - train = cudf.DataFrame( +def test_targetencoder_deprecated_1d_input(): + df = cudf.DataFrame( {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} ) + + # Warns in fit_transform encoder = TargetEncoder() - train_encoded = encoder.fit_transform(train.category, train.label) + with pytest.warns(FutureWarning, match="non-2-dimensional X"): + encoded = encoder.fit_transform(df.category, df.label) + answer = np.array([1.0, 1.0, 0.0, 1.0]) + assert array_equal(encoded, answer) + + # Warns in fit + encoder = TargetEncoder() + with pytest.warns(FutureWarning, match="non-2-dimensional X"): + encoder.fit(df.category, df.label) + + # Warns in tarnsform + with pytest.warns(FutureWarning, match="non-2-dimensional X"): + encoded = encoder.transform(df.category) + assert array_equal(encoded, answer) + + +def test_targetencoder_fit_transform(): + train = cudf.DataFrame({"category": ["a", "b", "b", "a"]}) + label = cudf.Series([1, 0, 1, 1]) + encoder = TargetEncoder() + train_encoded = encoder.fit_transform(train, label) answer = np.array([1.0, 1.0, 0.0, 1.0]) assert array_equal(train_encoded, answer) encoder = TargetEncoder() - encoder.fit(train.category, train.label) - train_encoded = encoder.transform(train.category) + encoder.fit(train, label) + train_encoded = encoder.transform(train) assert array_equal(train_encoded, answer) def test_targetencoder_transform(): - train = cudf.DataFrame( - {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} - ) + train = cudf.DataFrame({"category": ["a", "b", "b", "a"]}) + label = cudf.Series([1, 0, 1, 1]) test = cudf.DataFrame({"category": ["b", "b", "a", "b"]}) encoder = TargetEncoder() - encoder.fit_transform(train.category, train.label) - test_encoded = encoder.transform(test.category) + encoder.fit_transform(train, label) + test_encoded = encoder.transform(test) answer = np.array([0.5, 0.5, 1.0, 0.5]) assert array_equal(test_encoded, answer) encoder = TargetEncoder() - encoder.fit(train.category, train.label) - test_encoded = encoder.transform(test.category) + encoder.fit(train, label) + test_encoded = encoder.transform(test) assert array_equal(test_encoded, answer) @@ -58,8 +79,8 @@ def test_targetencoder_random(n_samples, dtype, stat): xt = cp.random.randint(0, 1000, n_samples).astype(dtype) encoder = TargetEncoder(stat=stat) - encoder.fit_transform(x, y) - test_encoded = encoder.transform(xt) + encoder.fit_transform(x[:, None], y) + test_encoded = encoder.transform(xt[:, None]) df_train = cudf.DataFrame({"x": x, "y": y}) dg = df_train.groupby("x", as_index=False).agg({"y": stat}) @@ -108,34 +129,32 @@ def test_targetencoder_newly_encountered(): Note that there are newly-encountered values in test, namely, 'c' and 'd'. """ - train = cudf.DataFrame( - {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} - ) + train = cudf.DataFrame({"category": ["a", "b", "b", "a"]}) + label = cudf.Series([1, 0, 1, 1]) test = cudf.DataFrame({"category": ["c", "b", "a", "d"]}) encoder = TargetEncoder() - encoder.fit_transform(train.category, train.label) - test_encoded = encoder.transform(test.category) + encoder.fit_transform(train, label) + test_encoded = encoder.transform(test) answer = np.array([0.75, 0.5, 1.0, 0.75]) assert array_equal(test_encoded, answer) encoder = TargetEncoder() - encoder.fit(train.category, train.label) - test_encoded = encoder.transform(test.category) + encoder.fit(train, label) + test_encoded = encoder.transform(test) assert array_equal(test_encoded, answer) def test_one_category(): - train = cudf.DataFrame( - {"category": ["a", "a", "a", "a"], "label": [3, 0, 0, 3]} - ) + train = cudf.DataFrame({"category": ["a", "a", "a", "a"]}) + label = cudf.Series([3, 0, 0, 3]) test = cudf.DataFrame({"category": ["c", "b", "a", "d"]}) encoder = TargetEncoder() - train_encoded = encoder.fit_transform(train.category, train.label) + train_encoded = encoder.fit_transform(train, label) answer = np.array([1.0, 2.0, 2.0, 1.0]) assert array_equal(train_encoded, answer) - test_encoded = encoder.transform(test.category) + test_encoded = encoder.transform(test) answer = np.array([1.5, 1.5, 1.5, 1.5]) assert array_equal(test_encoded, answer) @@ -145,16 +164,14 @@ def test_targetencoder_pandas(): Note that there are newly-encountered values in test, namely, 'c' and 'd'. """ - train = pandas.DataFrame( - {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} - ) + train = pandas.DataFrame({"category": ["a", "b", "b", "a"]}) + label = pandas.Series([1, 0, 1, 1]) test = pandas.DataFrame({"category": ["c", "b", "a", "d"]}) encoder = TargetEncoder() - encoder.fit_transform(train.category, train.label) - test_encoded = encoder.transform(test.category) + encoder.fit_transform(train, label) + test_encoded = encoder.transform(test) answer = np.array([0.75, 0.5, 1.0, 0.75]) assert array_equal(test_encoded, answer) - print(type(test_encoded)) assert isinstance(test_encoded, pandas.Series) @@ -163,15 +180,14 @@ def test_targetencoder_numpy(): Note that there are newly-encountered values in x_test, namely, 3 and 4. """ - x_train = np.array([1, 2, 2, 1]) + x_train = np.array([1, 2, 2, 1])[:, None] y_train = np.array([1, 0, 1, 1]) - x_test = np.array([1, 2, 3, 4]) + x_test = np.array([1, 2, 3, 4])[:, None] encoder = TargetEncoder() encoder.fit_transform(x_train, y_train) test_encoded = encoder.transform(x_test) answer = np.array([1.0, 0.5, 0.75, 0.75]) assert array_equal(test_encoded, answer) - print(type(test_encoded)) assert isinstance(test_encoded, np.ndarray) @@ -180,22 +196,20 @@ def test_targetencoder_cupy(): Note that there are newly-encountered values in x_test, namely, 3 and 4. """ - x_train = cp.array([1, 2, 2, 1]) + x_train = cp.array([1, 2, 2, 1])[:, None] y_train = cp.array([1, 0, 1, 1]) - x_test = cp.array([1, 2, 3, 4]) + x_test = cp.array([1, 2, 3, 4])[:, None] encoder = TargetEncoder() encoder.fit_transform(x_train, y_train) test_encoded = encoder.transform(x_test) answer = np.array([1.0, 0.5, 0.75, 0.75]) assert array_equal(test_encoded, answer) - print(type(test_encoded)) assert isinstance(test_encoded, cp.ndarray) def test_targetencoder_smooth(): - train = cudf.DataFrame( - {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} - ) + train = cudf.DataFrame({"category": ["a", "b", "b", "a"]}) + label = cudf.Series([1, 0, 1, 1]) answers = np.array( [ [1.0, 1.0, 0.0, 1.0], @@ -207,12 +221,12 @@ def test_targetencoder_smooth(): smooths = [0, 1, 2, 10000] for smooth, answer in zip(smooths, answers): encoder = TargetEncoder(smooth=smooth) - train_encoded = encoder.fit_transform(train.category, train.label) + train_encoded = encoder.fit_transform(train, label) assert array_equal(train_encoded, answer) encoder = TargetEncoder(smooth=smooth) - encoder.fit(train.category, train.label) - train_encoded = encoder.transform(train.category) + encoder.fit(train, label) + train_encoded = encoder.transform(train) assert array_equal(train_encoded, answer) @@ -224,36 +238,32 @@ def test_targetencoder_customized_fold_id(): the 2nd and 3rd sample belongs to `fold 1` and the 4th sample belongs to `fold 2` """ - train = cudf.DataFrame( - {"category": ["a", "b", "b", "a"], "label": [1, 0, 1, 1]} - ) + train = cudf.DataFrame({"category": ["a", "b", "b", "a"]}) + label = cudf.Series([1, 0, 1, 1]) fold_ids = [0, 1, 1, 2] encoder = TargetEncoder(split_method="customize") - train_encoded = encoder.fit_transform( - train.category, train.label, fold_ids=fold_ids - ) + train_encoded = encoder.fit_transform(train, label, fold_ids=fold_ids) answer = np.array([1.0, 0.75, 0.75, 1.0]) assert array_equal(train_encoded, answer) encoder = TargetEncoder(split_method="customize") - encoder.fit(train.category, train.label, fold_ids=fold_ids) - train_encoded = encoder.transform(train.category) + encoder.fit(train, label, fold_ids=fold_ids) + train_encoded = encoder.transform(train) assert array_equal(train_encoded, answer) def test_targetencoder_var(): - train = cudf.DataFrame( - {"category": ["a", "b", "b", "b"], "label": [1, 0, 1, 1]} - ) + train = cudf.DataFrame({"category": ["a", "b", "b", "b"]}) + label = cudf.Series([1, 0, 1, 1]) encoder = TargetEncoder(stat="var") - train_encoded = encoder.fit_transform(train.category, train.label) + train_encoded = encoder.fit_transform(train, label) answer = np.array([0.25, 0.0, 0.5, 0.5]) assert array_equal(train_encoded, answer) encoder = TargetEncoder(stat="var") - encoder.fit(train.category, train.label) - train_encoded = encoder.transform(train.category) + encoder.fit(train, label) + train_encoded = encoder.transform(train) assert array_equal(train_encoded, answer) @@ -289,18 +299,16 @@ def test_get_params(): def test_targetencoder_median(): train = cudf.DataFrame( - { - "category": ["a", "a", "a", "a", "b", "b", "b", "b"], - "label": [1, 22, 15, 17, 70, 9, 99, 56], - } + {"category": ["a", "a", "a", "a", "b", "b", "b", "b"]} ) + label = cudf.Series([1, 22, 15, 17, 70, 9, 99, 56]) encoder = TargetEncoder(stat="median") - train_encoded = encoder.fit_transform(train.category, train.label) + train_encoded = encoder.fit_transform(train, label) answer = np.array([17.0, 15.0, 17.0, 15.0, 56.0, 70.0, 56.0, 70.0]) assert array_equal(train_encoded, answer) encoder = TargetEncoder(stat="median") - encoder.fit(train.category, train.label) - train_encoded = encoder.transform(train.category) + encoder.fit(train, label) + train_encoded = encoder.transform(train) assert array_equal(train_encoded, answer) diff --git a/python/cuml/tests/test_tsne.py b/python/cuml/tests/test_tsne.py index 2dd7a4ae54..9f247d7c1d 100644 --- a/python/cuml/tests/test_tsne.py +++ b/python/cuml/tests/test_tsne.py @@ -230,7 +230,7 @@ def test_tsne_large(nrows, ncols, method): def test_components_exception(): with pytest.raises(ValueError): - TSNE(n_components=3).fit(np.array([])) + TSNE(n_components=3).fit(np.array([[]])) @pytest.mark.parametrize("input_type", ["cupy", "scipy"]) diff --git a/python/cuml/tests/test_validation.py b/python/cuml/tests/test_validation.py index 34837a6cf1..ee4481cb7d 100644 --- a/python/cuml/tests/test_validation.py +++ b/python/cuml/tests/test_validation.py @@ -78,10 +78,28 @@ def __cuda_array_interface__(self): assert _get_n_features([[1, 2, 3], [3, 4, 5]]) == 3 assert _get_n_features([np.array([1, 2])]) == 2 assert _get_n_features([]) == 0 - assert _get_n_features([1, 2, 3]) == 1 - assert _get_n_features(["a", "b", "c"]) == 1 - assert _get_n_features([b"a", b"b", b"c"]) == 1 - assert _get_n_features([{"a": 1, "b": 2}, {"c": 3}]) == 1 + + with pytest.raises(ValueError, match="Expected 2D array, got 3D array"): + _get_n_features(np.ones((3, 2, 1))) + + +@pytest.mark.parametrize( + "X", + [ + [1, 2, 3], + ["a", "b", "c"], + [b"a", b"b", b"c"], + [{"a": 1, "b": 2}, {"c": 3}], + np.array([1, 2, 3]), + cp.array([1, 2, 3]), + pd.Series([1, 2, 3]), + cudf.Series([1, 2, 3]), + ], +) +def test_get_n_features_1D(X): + with pytest.warns(FutureWarning, match="non-2-dimensional"): + n_features = _get_n_features(X) + assert n_features == 1 def test_get_feature_names():