diff --git a/python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml b/python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml index c423e8bf1c..9441616728 100644 --- a/python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml +++ b/python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml @@ -475,6 +475,7 @@ - "sklearn.ensemble.tests.test_forest::test_min_weight_fraction_leaf[RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_missing_value_is_predictive[RandomForestClassifier]" - "sklearn.ensemble.tests.test_forest::test_missing_value_is_predictive[RandomForestRegressor]" + - "sklearn.ensemble.tests.test_forest::test_missing_values_is_resilient[make_regression-RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_mse_criterion_object_segfault_smoke_test[RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_multioutput[RandomForestClassifier]" - "sklearn.ensemble.tests.test_forest::test_multioutput[RandomForestRegressor]" @@ -484,7 +485,6 @@ - "sklearn.ensemble.tests.test_forest::test_poisson_y_positive_check" - "sklearn.ensemble.tests.test_forest::test_probability[RandomForestClassifier]" - "sklearn.ensemble.tests.test_forest::test_regression_criterion[friedman_mse-RandomForestRegressor]" - - "sklearn.ensemble.tests.test_forest::test_regression_criterion[squared_error-RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_array-RandomForestClassifier]" - "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_array-RandomForestRegressor]" - "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_matrix-RandomForestClassifier]" diff --git a/python/cuml/cuml/ensemble/randomforestregressor.pyx b/python/cuml/cuml/ensemble/randomforestregressor.pyx index de0eaad81a..0eb5eedf16 100644 --- a/python/cuml/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/cuml/ensemble/randomforestregressor.pyx @@ -266,6 +266,7 @@ class RandomForestRegressor(BaseRandomForestModel, @device_interop_preparation def __init__(self, *, split_criterion=2, + max_features=1.0, accuracy_metric='r2', handle=None, verbose=False, @@ -274,6 +275,7 @@ class RandomForestRegressor(BaseRandomForestModel, self.RF_type = REGRESSION super().__init__( split_criterion=split_criterion, + max_features=max_features, accuracy_metric=accuracy_metric, handle=handle, verbose=verbose, diff --git a/python/cuml/cuml/tests/explainer/test_explainer_kernel_shap.py b/python/cuml/cuml/tests/explainer/test_explainer_kernel_shap.py index 89ec817e53..eff71ba822 100644 --- a/python/cuml/cuml/tests/explainer/test_explainer_kernel_shap.py +++ b/python/cuml/cuml/tests/explainer/test_explainer_kernel_shap.py @@ -219,7 +219,9 @@ def test_kernel_housing_dataset(housing_dataset): y_train = y_train.astype(np.float32) y_test = y_test.astype(np.float32) - cumodel = cuml.RandomForestRegressor().fit(X_train, y_train) + cumodel = cuml.RandomForestRegressor(max_features="sqrt").fit( + X_train, y_train + ) explainer = KernelExplainer( model=cumodel.predict, data=X_train[:100], output_type="numpy" diff --git a/python/cuml/cuml/tests/test_random_forest.py b/python/cuml/cuml/tests/test_random_forest.py index 766c65bca7..2c940cf224 100644 --- a/python/cuml/cuml/tests/test_random_forest.py +++ b/python/cuml/cuml/tests/test_random_forest.py @@ -199,6 +199,36 @@ def special_reg(request): return X, y +def test_default_parameters(): + reg_params = curfr().get_params() + clf_params = curfc().get_params() + + # Different default max_features + assert reg_params["max_features"] == 1.0 + assert clf_params["max_features"] == "sqrt" + + # Different default split_criterion + assert reg_params["split_criterion"] == 2 + assert clf_params["split_criterion"] == 0 + + # Different accuracy_metric + assert reg_params["accuracy_metric"] == "r2" + assert clf_params["accuracy_metric"] is None + + # Drop differing params + for name in [ + "max_features", + "split_criterion", + "accuracy_metric", + "handle", + ]: + reg_params.pop(name) + clf_params.pop(name) + + # The rest are the same + assert reg_params == clf_params + + @pytest.mark.parametrize("max_depth", [2, 4]) @pytest.mark.parametrize( "split_criterion", ["poisson", "gamma", "inverse_gaussian"]