diff --git a/autosklearn/automl.py b/autosklearn/automl.py index d8c624d06a..94d8f489d1 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -1396,6 +1396,10 @@ def predict(self, X, batch_size=None, n_jobs=1): "Predict is currently not implemented for resampling " f"strategy {self._resampling_strategy}, please call refit()." ) + elif self._disable_evaluator_output is not False: + raise NotImplementedError( + "Predict cannot be called when evaluator output is disabled." + ) if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None: self._load_models() @@ -1553,11 +1557,31 @@ def _load_models(self): else: self.ensemble_ = None - # If no ensemble is loaded, try to get the best performing model - if not self.ensemble_: + # If no ensemble is loaded, try to get the best performing model. + # This is triggered if + # 1. self._ensemble_size == 0 (see if-statement above) + # 2. if the ensemble builder crashed and no ensemble is available + # 3. if the ensemble cannot be built because of arguments passed + # by the user (disable_evaluator_output and + # resampling_strategy) + if ( + not self.ensemble_ + and not ( + self._disable_evaluator_output is True + or ( + isinstance(self._disable_evaluator_output, list) + and "model" in self._disable_evaluator_output + ) + ) + and self._resampling_strategy + not in ( + "partial-cv", + "partial-cv-iterative-fit", + ) + ): self.ensemble_ = self._load_best_individual_model() - if self.ensemble_: + if self.ensemble_ is not None: identifiers = self.ensemble_.get_selected_model_identifiers() self.models_ = self._backend.load_models_by_identifiers(identifiers) @@ -1567,39 +1591,9 @@ def _load_models(self): ) else: self.cv_models_ = None - - if len(self.models_) == 0 and self._resampling_strategy not in [ - "partial-cv", - "partial-cv-iterative-fit", - ]: - raise ValueError("No models fitted!") - - if ( - self._resampling_strategy in ["cv", "cv-iterative-fit"] - and len(self.cv_models_) == 0 - ): - raise ValueError("No models fitted!") - - elif self._disable_evaluator_output is False or ( - isinstance(self._disable_evaluator_output, list) - and "model" not in self._disable_evaluator_output - ): - model_names = self._backend.list_all_models(self._seed) - - if len(model_names) == 0 and self._resampling_strategy not in [ - "partial-cv", - "partial-cv-iterative-fit", - ]: - raise ValueError("No models fitted!") - - self.ensemble_ = None - self.models_ = [] - self.cv_models_ = None - else: - self.ensemble_ = None self.models_ = [] - self.cv_models_ = None + self.cv_models_ = [] def _load_best_individual_model(self): """ @@ -1608,11 +1602,11 @@ def _load_best_individual_model(self): by AutoML. This is a robust mechanism to be able to predict, even though no ensemble was found by ensemble builder. + It is also used to load the single best model in case + the user does not want to build an ensemble. """ # We also require that the model is fit and a task is defined - # The ensemble size must also be greater than 1, else it means - # that the user intentionally does not want an ensemble - if not self._task or self._ensemble_size < 1: + if not self._task: return None # SingleBest contains the best model found by AutoML diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 1c283e06e6..5e891b0a8e 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -79,7 +79,8 @@ def __init__( ensemble_size : int, optional (default=50) Number of models added to the ensemble built by *Ensemble selection from libraries of models*. Models are drawn with - replacement. If set to ``0`` no ensemble is fit. + replacement. If set to ``0`` no ensemble is fit and the single + best model is loaded. ensemble_nbest : int, optional (default=50) Only consider the ``ensemble_nbest`` models when building an @@ -526,9 +527,6 @@ def fit_ensemble( All parameters are ``None`` by default. If no other value is given, the default values which were set in a call to ``fit()`` are used. - Calling this function is only necessary if ``ensemble_size==0``, for - example when executing *auto-sklearn* in parallel. - Parameters ---------- y : array-like diff --git a/test/test_automl/test_post_fit.py b/test/test_automl/test_post_fit.py index 7cc6dafb6e..ed7882d7b4 100644 --- a/test/test_automl/test_post_fit.py +++ b/test/test_automl/test_post_fit.py @@ -1,4 +1,5 @@ from autosklearn.automl import AutoML +from autosklearn.ensembles.singlebest_ensemble import SingleBest from pytest_cases import parametrize_with_cases @@ -59,10 +60,10 @@ def test_no_ensemble(automl: AutoML) -> None: Expects ------- - * The ensemble should remain None - * The models_ should be empty + * Auto-sklearn loads a single best model + * The models_ should be of size 1 * The cv_models_ should remain None """ - assert automl.ensemble_ is None - assert automl.models_ == [] + assert isinstance(automl.ensemble_, SingleBest) + assert len(automl.models_) == 1 assert automl.cv_models_ is None