Skip to content

Commit 72953a8

Browse files
authored
Fix RandomForestRegressor default max_features (#6862)
This was a bug introduced with the removal of `max_features="auto"`. The default value for `RandomForestRegressor` should be `1.0`, while the default for `RandomForestClassifier` should be `"sqrt"`. They were both erroneously set to `"sqrt"`. Authors: - Jim Crist-Harif (https://github.com/jcrist) Approvers: - Philip Hyunsu Cho (https://github.com/hcho3) - Simon Adorf (https://github.com/csadorf) URL: #6862
1 parent 56994e8 commit 72953a8

4 files changed

Lines changed: 36 additions & 2 deletions

File tree

python/cuml/cuml/accel/tests/scikit-learn/xfail-list.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@
475475
- "sklearn.ensemble.tests.test_forest::test_min_weight_fraction_leaf[RandomForestRegressor]"
476476
- "sklearn.ensemble.tests.test_forest::test_missing_value_is_predictive[RandomForestClassifier]"
477477
- "sklearn.ensemble.tests.test_forest::test_missing_value_is_predictive[RandomForestRegressor]"
478+
- "sklearn.ensemble.tests.test_forest::test_missing_values_is_resilient[make_regression-RandomForestRegressor]"
478479
- "sklearn.ensemble.tests.test_forest::test_mse_criterion_object_segfault_smoke_test[RandomForestRegressor]"
479480
- "sklearn.ensemble.tests.test_forest::test_multioutput[RandomForestClassifier]"
480481
- "sklearn.ensemble.tests.test_forest::test_multioutput[RandomForestRegressor]"
@@ -484,7 +485,6 @@
484485
- "sklearn.ensemble.tests.test_forest::test_poisson_y_positive_check"
485486
- "sklearn.ensemble.tests.test_forest::test_probability[RandomForestClassifier]"
486487
- "sklearn.ensemble.tests.test_forest::test_regression_criterion[friedman_mse-RandomForestRegressor]"
487-
- "sklearn.ensemble.tests.test_forest::test_regression_criterion[squared_error-RandomForestRegressor]"
488488
- "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_array-RandomForestClassifier]"
489489
- "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_array-RandomForestRegressor]"
490490
- "sklearn.ensemble.tests.test_forest::test_sparse_input[coo_matrix-RandomForestClassifier]"

python/cuml/cuml/ensemble/randomforestregressor.pyx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class RandomForestRegressor(BaseRandomForestModel,
266266
@device_interop_preparation
267267
def __init__(self, *,
268268
split_criterion=2,
269+
max_features=1.0,
269270
accuracy_metric='r2',
270271
handle=None,
271272
verbose=False,
@@ -274,6 +275,7 @@ class RandomForestRegressor(BaseRandomForestModel,
274275
self.RF_type = REGRESSION
275276
super().__init__(
276277
split_criterion=split_criterion,
278+
max_features=max_features,
277279
accuracy_metric=accuracy_metric,
278280
handle=handle,
279281
verbose=verbose,

python/cuml/cuml/tests/explainer/test_explainer_kernel_shap.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,9 @@ def test_kernel_housing_dataset(housing_dataset):
219219
y_train = y_train.astype(np.float32)
220220
y_test = y_test.astype(np.float32)
221221

222-
cumodel = cuml.RandomForestRegressor().fit(X_train, y_train)
222+
cumodel = cuml.RandomForestRegressor(max_features="sqrt").fit(
223+
X_train, y_train
224+
)
223225

224226
explainer = KernelExplainer(
225227
model=cumodel.predict, data=X_train[:100], output_type="numpy"

python/cuml/cuml/tests/test_random_forest.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,36 @@ def special_reg(request):
199199
return X, y
200200

201201

202+
def test_default_parameters():
203+
reg_params = curfr().get_params()
204+
clf_params = curfc().get_params()
205+
206+
# Different default max_features
207+
assert reg_params["max_features"] == 1.0
208+
assert clf_params["max_features"] == "sqrt"
209+
210+
# Different default split_criterion
211+
assert reg_params["split_criterion"] == 2
212+
assert clf_params["split_criterion"] == 0
213+
214+
# Different accuracy_metric
215+
assert reg_params["accuracy_metric"] == "r2"
216+
assert clf_params["accuracy_metric"] is None
217+
218+
# Drop differing params
219+
for name in [
220+
"max_features",
221+
"split_criterion",
222+
"accuracy_metric",
223+
"handle",
224+
]:
225+
reg_params.pop(name)
226+
clf_params.pop(name)
227+
228+
# The rest are the same
229+
assert reg_params == clf_params
230+
231+
202232
@pytest.mark.parametrize("max_depth", [2, 4])
203233
@pytest.mark.parametrize(
204234
"split_criterion", ["poisson", "gamma", "inverse_gaussian"]

0 commit comments

Comments
 (0)