Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 31 additions & 37 deletions autosklearn/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,10 @@ def predict(self, X, batch_size=None, n_jobs=1):
"Predict is currently not implemented for resampling "
f"strategy {self._resampling_strategy}, please call refit()."
)
elif self._disable_evaluator_output is not False:
raise NotImplementedError(
"Predict cannot be called when evaluator output is disabled."
)

if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
self._load_models()
Expand Down Expand Up @@ -1553,11 +1557,31 @@ def _load_models(self):
else:
self.ensemble_ = None

# If no ensemble is loaded, try to get the best performing model
if not self.ensemble_:
# If no ensemble is loaded, try to get the best performing model.
# This is triggered if
# 1. self._ensemble_size == 0 (see if-statement above)
# 2. if the ensemble builder crashed and no ensemble is available
# 3. if the ensemble cannot be built because of arguments passed
# by the user (disable_evaluator_output and
# resampling_strategy)
if (
not self.ensemble_
and not (
self._disable_evaluator_output is True
or (
isinstance(self._disable_evaluator_output, list)
and "model" in self._disable_evaluator_output
)
)
and self._resampling_strategy
not in (
"partial-cv",
"partial-cv-iterative-fit",
)
):
self.ensemble_ = self._load_best_individual_model()

if self.ensemble_:
if self.ensemble_ is not None:
identifiers = self.ensemble_.get_selected_model_identifiers()
self.models_ = self._backend.load_models_by_identifiers(identifiers)

Expand All @@ -1567,39 +1591,9 @@ def _load_models(self):
)
else:
self.cv_models_ = None

if len(self.models_) == 0 and self._resampling_strategy not in [
"partial-cv",
"partial-cv-iterative-fit",
]:
raise ValueError("No models fitted!")

if (
self._resampling_strategy in ["cv", "cv-iterative-fit"]
and len(self.cv_models_) == 0
):
raise ValueError("No models fitted!")

elif self._disable_evaluator_output is False or (
isinstance(self._disable_evaluator_output, list)
and "model" not in self._disable_evaluator_output
):
model_names = self._backend.list_all_models(self._seed)

if len(model_names) == 0 and self._resampling_strategy not in [
"partial-cv",
"partial-cv-iterative-fit",
]:
raise ValueError("No models fitted!")

self.ensemble_ = None
self.models_ = []
self.cv_models_ = None

else:
self.ensemble_ = None
self.models_ = []
self.cv_models_ = None
self.cv_models_ = []

def _load_best_individual_model(self):
"""
Expand All @@ -1608,11 +1602,11 @@ def _load_best_individual_model(self):
by AutoML.
This is a robust mechanism to be able to predict,
even though no ensemble was found by ensemble builder.
It is also used to load the single best model in case
the user does not want to build an ensemble.
"""
# We also require that the model is fit and a task is defined
# The ensemble size must also be greater than 1, else it means
# that the user intentionally does not want an ensemble
if not self._task or self._ensemble_size < 1:
if not self._task:
return None

# SingleBest contains the best model found by AutoML
Expand Down
6 changes: 2 additions & 4 deletions autosklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ def __init__(
ensemble_size : int, optional (default=50)
Number of models added to the ensemble built by *Ensemble
selection from libraries of models*. Models are drawn with
replacement. If set to ``0`` no ensemble is fit.
replacement. If set to ``0`` no ensemble is fit and the single
best model is loaded.

ensemble_nbest : int, optional (default=50)
Only consider the ``ensemble_nbest`` models when building an
Expand Down Expand Up @@ -526,9 +527,6 @@ def fit_ensemble(
All parameters are ``None`` by default. If no other value is given,
the default values which were set in a call to ``fit()`` are used.

Calling this function is only necessary if ``ensemble_size==0``, for
example when executing *auto-sklearn* in parallel.

Parameters
----------
y : array-like
Expand Down
9 changes: 5 additions & 4 deletions test/test_automl/test_post_fit.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from autosklearn.automl import AutoML
from autosklearn.ensembles.singlebest_ensemble import SingleBest

from pytest_cases import parametrize_with_cases

Expand Down Expand Up @@ -59,10 +60,10 @@ def test_no_ensemble(automl: AutoML) -> None:

Expects
-------
* The ensemble should remain None
* The models_ should be empty
* Auto-sklearn loads a single best model
* The models_ should be of size 1
* The cv_models_ should remain None
"""
assert automl.ensemble_ is None
assert automl.models_ == []
assert isinstance(automl.ensemble_, SingleBest)
assert len(automl.models_) == 1
assert automl.cv_models_ is None