Skip to content

Commit 2885692

Browse files
First draft of multi-objective optimization
Co-authored-by: Katharina Eggensperger <[email protected]>
1 parent daa9ad6 commit 2885692

File tree

7 files changed

+183
-95
lines changed

7 files changed

+183
-95
lines changed

autosklearn/automl.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import annotations
22

3-
from typing import Any, Callable, Iterable, Mapping, Optional, Tuple
3+
from typing import Any, Callable, Iterable, List, Mapping, Optional, Tuple, Union
44

55
import copy
66
import io
@@ -210,7 +210,7 @@ def __init__(
210210
get_smac_object_callback: Optional[Callable] = None,
211211
smac_scenario_args: Optional[Mapping] = None,
212212
logging_config: Optional[Mapping] = None,
213-
metric: Optional[Scorer] = None,
213+
metric: Optional[Union[Scorer, List[Scorer], Tuple[Scorer]]] = None,
214214
scoring_functions: Optional[list[Scorer]] = None,
215215
get_trials_callback: Optional[IncorporateRunResultCallback] = None,
216216
dataset_compression: bool | Mapping[str, Any] = True,
@@ -265,7 +265,7 @@ def __init__(
265265
initial_configurations_via_metalearning
266266
)
267267

268-
self._scoring_functions = scoring_functions or {}
268+
self._scoring_functions = scoring_functions or []
269269
self._resampling_strategy_arguments = resampling_strategy_arguments or {}
270270

271271
# Single core, local runs should use fork to prevent the __main__ requirements
@@ -692,10 +692,14 @@ def fit(
692692
# defined in the estimator fit call
693693
if self._metric is None:
694694
raise ValueError("No metric given.")
695-
if not isinstance(self._metric, Scorer):
696-
raise ValueError(
697-
"Metric must be instance of " "autosklearn.metrics.Scorer."
698-
)
695+
if isinstance(self._metric, (List, Tuple)):
696+
for entry in self._metric:
697+
if not isinstance(entry, Scorer):
698+
raise ValueError(
699+
"Metric must be instance of autosklearn.metrics.Scorer."
700+
)
701+
elif not isinstance(self._metric, Scorer):
702+
raise ValueError("Metric must be instance of autosklearn.metrics.Scorer.")
699703

700704
# If no dask client was provided, we create one, so that we can
701705
# start a ensemble process in parallel to smbo optimize
@@ -790,7 +794,11 @@ def fit(
790794
backend=copy.deepcopy(self._backend),
791795
dataset_name=dataset_name,
792796
task=self._task,
793-
metric=self._metric,
797+
metric=(
798+
self._metric[0]
799+
if isinstance(self._metric, (List, Tuple))
800+
else self._metric
801+
),
794802
ensemble_size=self._ensemble_size,
795803
ensemble_nbest=self._ensemble_nbest,
796804
max_models_on_disc=self._max_models_on_disc,
@@ -1492,7 +1500,11 @@ def fit_ensemble(
14921500
backend=copy.deepcopy(self._backend),
14931501
dataset_name=dataset_name if dataset_name else self._dataset_name,
14941502
task=task if task else self._task,
1495-
metric=self._metric,
1503+
metric=(
1504+
self._metric[0]
1505+
if isinstance(self._metric, (List, Tuple))
1506+
else self._metric
1507+
),
14961508
ensemble_size=ensemble_size if ensemble_size else self._ensemble_size,
14971509
ensemble_nbest=ensemble_nbest if ensemble_nbest else self._ensemble_nbest,
14981510
max_models_on_disc=self._max_models_on_disc,
@@ -1912,7 +1924,6 @@ def show_models(self) -> dict[int, Any]:
19121924
.. code-block:: python
19131925
19141926
import sklearn.datasets
1915-
import sklearn.metrics
19161927
import autosklearn.regression
19171928
19181929
X, y = sklearn.datasets.load_diabetes(return_X_y=True)

autosklearn/estimators.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def __init__(
4646
smac_scenario_args=None,
4747
logging_config=None,
4848
metadata_directory=None,
49-
metric=None,
49+
metric: Optional[Union[Scorer, List[Scorer], Tuple[Scorer]]] = None,
5050
scoring_functions: Optional[List[Scorer]] = None,
5151
load_models: bool = True,
5252
get_trials_callback=None,

autosklearn/evaluation/__init__.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,13 @@ def fit_predict_try_except_decorator(
8585
queue.close()
8686

8787

88-
def get_cost_of_crash(metric: Scorer) -> float:
88+
def get_cost_of_crash(
89+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]]
90+
) -> Union[float, List[float]]:
8991

90-
# The metric must always be defined to extract optimum/worst
91-
if not isinstance(metric, Scorer):
92+
if isinstance(metric, (List, Tuple)):
93+
return [cast(float, get_cost_of_crash(metric_)) for metric_ in metric]
94+
elif not isinstance(metric, Scorer):
9295
raise ValueError("The metric must be stricly be an instance of Scorer")
9396

9497
# Autosklearn optimizes the err. This function translates
@@ -126,7 +129,7 @@ def __init__(
126129
resampling_strategy: Union[
127130
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
128131
],
129-
metric: Scorer,
132+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
130133
cost_for_crash: float,
131134
abort_on_first_run_crash: bool,
132135
port: int,
@@ -144,7 +147,7 @@ def __init__(
144147
disable_file_output: bool = False,
145148
init_params: Optional[Dict[str, Any]] = None,
146149
budget_type: Optional[str] = None,
147-
ta: Optional[Callable] = None,
150+
ta: Optional[Callable] = None, # Required by SMAC's parent class
148151
**resampling_strategy_args: Any,
149152
):
150153
if resampling_strategy == "holdout":
@@ -186,6 +189,7 @@ def __init__(
186189
par_factor=par_factor,
187190
cost_for_crash=self.worst_possible_result,
188191
abort_on_first_run_crash=abort_on_first_run_crash,
192+
multi_objectives=multi_objectives,
189193
)
190194

191195
self.backend = backend
@@ -550,4 +554,32 @@ def run(
550554

551555
autosklearn.evaluation.util.empty_queue(queue)
552556
self.logger.info("Finished evaluating configuration %d" % config_id)
557+
558+
# Do some sanity checking (for multi objective)
559+
if len(self.multi_objectives) > 1:
560+
error = (
561+
f"Returned costs {cost} does not match the number of objectives"
562+
f" {len(self.multi_objectives)}."
563+
)
564+
565+
# If dict convert to array
566+
# Make sure the ordering is correct
567+
if isinstance(cost, dict):
568+
ordered_cost = []
569+
for name in self.multi_objectives:
570+
if name not in cost:
571+
raise RuntimeError(
572+
f"Objective {name} was not found in the returned costs."
573+
)
574+
575+
ordered_cost.append(cost[name])
576+
cost = ordered_cost
577+
578+
if isinstance(cost, list):
579+
if len(cost) != len(self.multi_objectives):
580+
raise RuntimeError(error)
581+
582+
if isinstance(cost, float):
583+
raise RuntimeError(error)
584+
553585
return status, cost, runtime, additional_run_info

autosklearn/evaluation/abstract_evaluator.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def __init__(
184184
self,
185185
backend: Backend,
186186
queue: multiprocessing.Queue,
187-
metric: Scorer,
187+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
188188
additional_components: Dict[str, ThirdPartyComponents],
189189
port: Optional[int],
190190
configuration: Optional[Union[int, Configuration]] = None,
@@ -326,7 +326,6 @@ def _loss(
326326
self,
327327
y_true: np.ndarray,
328328
y_hat: np.ndarray,
329-
scoring_functions: Optional[List[Scorer]] = None,
330329
) -> Union[float, Dict[str, float]]:
331330
"""Auto-sklearn follows a minimization goal.
332331
The calculate_loss internally translate a score function to
@@ -338,21 +337,30 @@ def _loss(
338337
----------
339338
y_true
340339
"""
341-
scoring_functions = (
342-
self.scoring_functions if scoring_functions is None else scoring_functions
343-
)
344340
if not isinstance(self.configuration, Configuration):
345-
if scoring_functions:
346-
return {self.metric.name: self.metric._worst_possible_result}
341+
if self.scoring_functions:
342+
if isinstance(self.metric, Scorer):
343+
return {self.metric.name: self.metric._worst_possible_result}
344+
else:
345+
return {
346+
metric.name: metric._worst_possible_result
347+
for metric in self.metric
348+
}
347349
else:
348-
return self.metric._worst_possible_result
350+
if isinstance(self.metric, Scorer):
351+
return self.metric._worst_possible_result
352+
else:
353+
return {
354+
metric.name: metric._worst_possible_result
355+
for metric in self.metric
356+
}
349357

350358
return calculate_loss(
351359
y_true,
352360
y_hat,
353361
self.task_type,
354362
self.metric,
355-
scoring_functions=scoring_functions,
363+
scoring_functions=self.scoring_functions,
356364
)
357365

358366
def finish_up(
@@ -402,7 +410,10 @@ def finish_up(
402410

403411
if isinstance(loss, dict):
404412
loss_ = loss
405-
loss = loss_[self.metric.name]
413+
if isinstance(self.metric, Scorer):
414+
loss = loss_[self.metric.name]
415+
else:
416+
loss = {metric: loss_[metric] for metric in loss_}
406417
else:
407418
loss_ = {}
408419

autosklearn/evaluation/train_evaluator.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def __init__(
182182
self,
183183
backend: Backend,
184184
queue: multiprocessing.Queue,
185-
metric: Scorer,
185+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
186186
additional_components: Dict[str, ThirdPartyComponents],
187187
port: Optional[int],
188188
configuration: Optional[Union[int, Configuration]] = None,
@@ -645,19 +645,31 @@ def fit_predict_and_loss(self, iterative: bool = False) -> None:
645645
# train_losses is a list of either scalars or dicts. If it contains dicts,
646646
# then train_loss is computed using the target metric (self.metric).
647647
if all(isinstance(elem, dict) for elem in train_losses):
648-
train_loss = np.average(
649-
[
650-
train_losses[i][str(self.metric)]
651-
for i in range(self.num_cv_folds)
652-
],
653-
weights=train_fold_weights,
654-
)
648+
if isinstance(self.metric, Scorer):
649+
train_loss = np.average(
650+
[
651+
train_losses[i][str(self.metric)]
652+
for i in range(self.num_cv_folds)
653+
],
654+
weights=train_fold_weights,
655+
)
656+
else:
657+
train_loss = [
658+
np.average(
659+
[
660+
train_losses[i][str(metric)]
661+
for i in range(self.num_cv_folds)
662+
],
663+
weights=train_fold_weights,
664+
)
665+
for metric in self.metric
666+
]
655667
else:
656668
train_loss = np.average(train_losses, weights=train_fold_weights)
657669

658670
# if all_scoring_function is true, return a dict of opt_loss. Otherwise,
659671
# return a scalar.
660-
if self.scoring_functions:
672+
if self.scoring_functions or not isinstance(self.metric, Scorer):
661673
opt_loss = {}
662674
for metric in opt_losses[0].keys():
663675
opt_loss[metric] = np.average(
@@ -1316,7 +1328,7 @@ def eval_holdout(
13161328
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
13171329
],
13181330
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1319-
metric: Scorer,
1331+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
13201332
seed: int,
13211333
num_run: int,
13221334
instance: str,
@@ -1363,7 +1375,7 @@ def eval_iterative_holdout(
13631375
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
13641376
],
13651377
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1366-
metric: Scorer,
1378+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
13671379
seed: int,
13681380
num_run: int,
13691381
instance: str,
@@ -1410,7 +1422,7 @@ def eval_partial_cv(
14101422
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
14111423
],
14121424
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1413-
metric: Scorer,
1425+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
14141426
seed: int,
14151427
num_run: int,
14161428
instance: str,
@@ -1463,7 +1475,7 @@ def eval_partial_cv_iterative(
14631475
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
14641476
],
14651477
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1466-
metric: Scorer,
1478+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
14671479
seed: int,
14681480
num_run: int,
14691481
instance: str,
@@ -1511,7 +1523,7 @@ def eval_cv(
15111523
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
15121524
],
15131525
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1514-
metric: Scorer,
1526+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
15151527
seed: int,
15161528
num_run: int,
15171529
instance: str,
@@ -1559,7 +1571,7 @@ def eval_iterative_cv(
15591571
str, BaseCrossValidator, _RepeatedSplits, BaseShuffleSplit
15601572
],
15611573
resampling_strategy_args: Dict[str, Optional[Union[float, int, str]]],
1562-
metric: Scorer,
1574+
metric: Union[Scorer, List[Scorer], Tuple[Scorer]],
15631575
seed: int,
15641576
num_run: int,
15651577
instance: str,

0 commit comments

Comments
 (0)